asi se fue xd

2026-02-18 12:13:22 -06:00
parent d27da30494
commit d0fdd04132
17 changed files with 612 additions and 735 deletions
--- a/server/routes/whisper.ts
+++ b/server/routes/whisper.ts
@@ -50,7 +50,7 @@ export async function handleWhisperRoutes(req: Request): Promise<Response | null
    return Response.json({
      ...result,
      ...state,
-      message: state.enabled ? 'Whisper enabled (GPU)' : 'Whisper disabled (using Web Speech API)'
+      message: state.running ? 'Whisper GPU running' : 'Whisper GPU starting...'
    })
  }

--- a/server/services/whisper.ts
+++ b/server/services/whisper.ts
@@ -1,6 +1,7 @@
 /**
- * Whisper Service - Manages the Python Whisper server process
- * Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
+ * Whisper Service - Singleton persistent GPU speech-to-text server
+ * Auto-starts with the system, auto-restarts on crash.
+ * Single instance processes all client requests.
 */

 import { join } from 'path'
@@ -8,18 +9,19 @@ import { Subprocess } from 'bun'

 const WHISPER_PORT = 4104
 const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
+const RESTART_DELAY_MS = 3000  // Wait before auto-restart after crash

 interface WhisperState {
  enabled: boolean
  running: boolean
-  starting: boolean  // Prevents multiple simultaneous start attempts
+  starting: boolean
  process: Subprocess | null
  model: string
  device: string
 }

 const state: WhisperState = {
-  enabled: false,
+  enabled: true,   // Always enabled by default
  running: false,
  starting: false,
  process: null,
@@ -32,104 +34,16 @@ const state: WhisperState = {
 */
 async function killProcessOnPort(port: number): Promise<void> {
  try {
-    // Use PowerShell to find and kill process on port
    const proc = Bun.spawn(['powershell', '-Command',
      `Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
    ], { stdout: 'ignore', stderr: 'ignore' })
    await proc.exited
-    // Wait a moment for port to be released
    await new Promise(resolve => setTimeout(resolve, 1000))
  } catch {
    // Ignore errors
  }
 }

-/**
- * Start the Whisper Python server
- */
-export async function startWhisperServer(): Promise<boolean> {
-  // Prevent multiple simultaneous start attempts
-  if (state.starting) {
-    return false
-  }
-
-  if (state.running && state.process) {
-    return true
-  }
-
-  state.starting = true
-  console.log(`[Whisper] Starting (${state.model})...`)
-
-  // Kill any existing process on the port
-  await killProcessOnPort(WHISPER_PORT)
-
-  try {
-    // Use Bun.spawn with inherit to show logs directly in console
-    // -u flag disables Python output buffering for real-time logs
-    const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
-      cwd: join(import.meta.dir, '..'),
-      stdout: 'inherit',
-      stderr: 'inherit',
-      env: { ...process.env, PYTHONUNBUFFERED: '1' }
-    })
-
-    state.process = proc
-
-    // Wait a bit for the server to start
-    await new Promise(resolve => setTimeout(resolve, 2000))
-
-    // Check if process is still running
-    if (proc.exitCode !== null) {
-      console.error('[Whisper] Process exited with code:', proc.exitCode)
-      state.process = null
-      state.starting = false
-      return false
-    }
-
-    // Check if WebSocket is ready
-    const isListening = await checkPort(WHISPER_PORT)
-
-    if (isListening) {
-      console.log('[Whisper] Ready')
-      state.running = true
-      state.enabled = true
-      state.starting = false
-      return true
-    }
-
-    // Wait more if model is still loading (up to 120 seconds total for large models)
-    for (let i = 0; i < 40; i++) {
-      await new Promise(resolve => setTimeout(resolve, 3000))
-
-      if (proc.exitCode !== null) {
-        console.error('[Whisper] Process died')
-        state.process = null
-        state.starting = false
-        return false
-      }
-
-      const ready = await checkPort(WHISPER_PORT)
-      if (ready) {
-        console.log('[Whisper] Ready')
-        state.running = true
-        state.enabled = true
-        state.starting = false
-        return true
-      }
-    }
-
-    console.error('[Whisper] Timeout (120s)')
-    state.starting = false
-    return false
-
-  } catch (err: any) {
-    console.error('[Whisper] Error:', err.message)
-    state.process = null
-    state.starting = false
-    return false
-  }
-}
-
 /**
 * Check if Whisper WebSocket is ready using PowerShell
 */
@@ -152,7 +66,125 @@ async function checkPort(port: number): Promise<boolean> {
 }

 /**
- * Stop the Whisper server
+ * Monitor the Whisper process and auto-restart on crash
+ */
+function monitorProcess(proc: Subprocess) {
+  proc.exited.then((exitCode) => {
+    console.error(`[Whisper] Process exited with code ${exitCode}`)
+    state.process = null
+    state.running = false
+    state.starting = false
+
+    // Auto-restart after delay
+    console.log(`[Whisper] Auto-restarting in ${RESTART_DELAY_MS / 1000}s...`)
+    setTimeout(() => {
+      startWhisperServer().catch(err => {
+        console.error('[Whisper] Auto-restart failed:', err)
+      })
+    }, RESTART_DELAY_MS)
+  })
+}
+
+/**
+ * Start the Whisper Python server (singleton - only one instance)
+ */
+export async function startWhisperServer(): Promise<boolean> {
+  // Prevent multiple simultaneous start attempts
+  if (state.starting) {
+    console.log('[Whisper] Already starting, skipping')
+    return false
+  }
+
+  // Already running
+  if (state.running && state.process) {
+    console.log('[Whisper] Already running')
+    return true
+  }
+
+  // Check if an external instance is already listening
+  const alreadyListening = await checkPort(WHISPER_PORT)
+  if (alreadyListening) {
+    console.log('[Whisper] External instance already running on port', WHISPER_PORT)
+    state.running = true
+    state.enabled = true
+    return true
+  }
+
+  state.starting = true
+  console.log(`[Whisper] Starting singleton server (${state.model})...`)
+
+  // Kill any orphan process on the port
+  await killProcessOnPort(WHISPER_PORT)
+
+  try {
+    const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
+      cwd: join(import.meta.dir, '..'),
+      stdout: 'inherit',
+      stderr: 'inherit',
+      env: { ...process.env, PYTHONUNBUFFERED: '1' }
+    })
+
+    state.process = proc
+
+    // Monitor for crashes and auto-restart
+    monitorProcess(proc)
+
+    // Wait for initial startup
+    await new Promise(resolve => setTimeout(resolve, 2000))
+
+    // Check if process died immediately
+    if (proc.exitCode !== null) {
+      console.error('[Whisper] Process exited immediately with code:', proc.exitCode)
+      state.process = null
+      state.starting = false
+      return false
+    }
+
+    // Check if WebSocket is ready
+    const isListening = await checkPort(WHISPER_PORT)
+    if (isListening) {
+      console.log('[Whisper] Server ready (GPU)')
+      state.running = true
+      state.enabled = true
+      state.starting = false
+      return true
+    }
+
+    // Wait for model loading (up to 120 seconds for large-v3)
+    for (let i = 0; i < 40; i++) {
+      await new Promise(resolve => setTimeout(resolve, 3000))
+
+      if (proc.exitCode !== null) {
+        console.error('[Whisper] Process died during model loading')
+        state.process = null
+        state.starting = false
+        return false
+      }
+
+      const ready = await checkPort(WHISPER_PORT)
+      if (ready) {
+        console.log('[Whisper] Server ready (GPU)')
+        state.running = true
+        state.enabled = true
+        state.starting = false
+        return true
+      }
+    }
+
+    console.error('[Whisper] Timeout waiting for server (120s)')
+    state.starting = false
+    return false
+
+  } catch (err: any) {
+    console.error('[Whisper] Start error:', err.message)
+    state.process = null
+    state.starting = false
+    return false
+  }
+}
+
+/**
+ * Stop the Whisper server (only for manual override, not used in normal flow)
 */
 export function stopWhisperServer(): boolean {
  if (!state.process) {
@@ -163,8 +195,7 @@ export function stopWhisperServer(): boolean {
    state.process.kill()
    state.process = null
    state.running = false
-    state.enabled = false
-    console.log('[Whisper] Stopped')
+    console.log('[Whisper] Stopped manually')
    return true
  } catch (err) {
    console.error('[Whisper] Stop error:', err)
@@ -173,27 +204,26 @@ export function stopWhisperServer(): boolean {
 }

 /**
- * Toggle Whisper server on/off (async - returns immediately when starting)
+ * Toggle is now a no-op for stop - Whisper always stays on.
+ * If not running, triggers a start.
 */
 export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
-  // Prevent toggle while starting
  if (state.starting) {
-    return { enabled: false, success: false, starting: true }
+    return { enabled: true, success: false, starting: true }
  }

-  if (state.enabled && state.running) {
-    const success = stopWhisperServer()
-    return { enabled: false, success, starting: false }
-  } else {
-    // Start server in background - don't await
-    startWhisperServer().catch(err => {
-      console.error('[Whisper] Start error:', err)
-      state.starting = false
-    })
-
-    // Return immediately - frontend will poll for status
-    return { enabled: false, success: true, starting: true }
+  if (state.running) {
+    // Already running - just confirm it's on
+    return { enabled: true, success: true, starting: false }
  }
+
+  // Not running - start it
+  startWhisperServer().catch(err => {
+    console.error('[Whisper] Start error:', err)
+    state.starting = false
+  })
+
+  return { enabled: true, success: true, starting: true }
 }

 /**
@@ -217,13 +247,12 @@ export async function getWhisperState(): Promise<{
      state.enabled = true
    } else if (!isListening && state.running) {
      state.running = false
-      state.enabled = false
-      state.process = null
+      // Keep enabled=true since we auto-restart
    }
  }

  return {
-    enabled: state.enabled,
+    enabled: true,  // Always enabled
    running: state.running,
    starting: state.starting,
    port: WHISPER_PORT,
@@ -233,15 +262,12 @@ export async function getWhisperState(): Promise<{
 }

 /**
- * Check if Whisper is enabled
+ * Check if Whisper is running
 */
 export function isWhisperEnabled(): boolean {
-  return state.enabled && state.running
+  return state.running
 }

-// WebSocket server for Whisper (proxies to Python server or handles directly)
-let whisperWsServer: any = null
-
 export function getWhisperPort(): number {
  return WHISPER_PORT
 }
--- a/server/terminal.ts
+++ b/server/terminal.ts
@@ -3,10 +3,12 @@
 * Terminal Server - Independent process
 * This runs separately from the main server to maintain stable Claude Code sessions
 * even when the main server restarts due to code changes.
+ * Also manages the Whisper GPU server (singleton, persistent).
 */

 import { startTerminalServer } from './services/terminal'
 import { startSyncServer } from './services/sync-server'
+import { startWhisperServer } from './services/whisper'
 import { WORKING_DIR } from './config'

 console.log('')
@@ -14,6 +16,7 @@ console.log('='.repeat(50))
 console.log('Terminal Server (Independent Process)')
 console.log(`  Terminal WebSocket: ws://localhost:4103`)
 console.log(`  Sync WebSocket (Git + Torch): ws://localhost:4105`)
+console.log(`  Whisper GPU: ws://localhost:4104 (auto-start)`)
 console.log(`  Working Dir: ${WORKING_DIR}`)
 console.log('')
 console.log('This process is stable and won\'t restart')
@@ -23,3 +26,14 @@ console.log('')

 startTerminalServer()
 startSyncServer()
+
+// Auto-start Whisper GPU server (singleton, persistent, auto-restart on crash)
+startWhisperServer().then(ok => {
+  if (ok) {
+    console.log('[Whisper] GPU server started successfully')
+  } else {
+    console.warn('[Whisper] Failed initial start (will auto-retry)')
+  }
+}).catch(err => {
+  console.error('[Whisper] Boot error:', err)
+})
--- a/server/whisper_server.py
+++ b/server/whisper_server.py
@@ -26,35 +26,25 @@ except ImportError as e:
 def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes:
    """
    Convert audio data to WAV format using ffmpeg.
-    Whisper requires WAV/PCM format, but browsers typically record in WebM/Opus.
+    Uses stdin/stdout pipes so ffmpeg probes the actual data format
+    instead of relying on file extensions.
    """
-    # Create temp files for input and output
-    with tempfile.NamedTemporaryFile(suffix=f".{input_format}", delete=False) as in_file:
-        in_file.write(input_data)
-        input_path = in_file.name
-
-    output_path = input_path.replace(f".{input_format}", ".wav")
-
    try:
-        # Use ffmpeg to convert to WAV (16kHz mono, which Whisper prefers)
        result = subprocess.run([
-            "ffmpeg", "-y",  # Overwrite output
-            "-i", input_path,  # Input file
-            "-ar", "16000",  # Sample rate 16kHz
-            "-ac", "1",  # Mono
+            "ffmpeg", "-y",
+            "-i", "pipe:0",       # Read from stdin (auto-detect format)
+            "-ar", "16000",       # Sample rate 16kHz
+            "-ac", "1",           # Mono
            "-c:a", "pcm_s16le",  # PCM 16-bit little-endian
-            output_path
-        ], capture_output=True, text=True, timeout=30)
+            "-f", "wav",          # Output format
+            "pipe:1"              # Write to stdout
+        ], input=input_data, capture_output=True, timeout=30)

        if result.returncode != 0:
-            print(f"[Whisper] ffmpeg error: {result.stderr}")
+            print(f"[Whisper] ffmpeg error: {result.stderr.decode('utf-8', errors='replace')}")
            return None

-        # Read the converted WAV file
-        with open(output_path, "rb") as f:
-            wav_data = f.read()
-
-        return wav_data
+        return result.stdout

    except subprocess.TimeoutExpired:
        print("[Whisper] ffmpeg conversion timed out")
@@ -65,16 +55,6 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
    except Exception as e:
        print(f"[Whisper] Conversion error: {e}")
        return None
-    finally:
-        # Cleanup temp files
-        try:
-            os.unlink(input_path)
-        except:
-            pass
-        try:
-            os.unlink(output_path)
-        except:
-            pass

 # Configuration
 HOST = "0.0.0.0"  # Listen on all interfaces (needed for Traefik proxy)