feat: Add Whisper GPU speech-to-text with progressive transcription

- Add faster-whisper Python server for GPU-accelerated transcription - Support dual mode: Web Speech API or Whisper GPU (toggleable) - Progressive transcription every 3 seconds while recording - Separate terminal server process (stable during hot-reload) - Add Ctrl+V paste and Ctrl+C copy support in FloatingTerminal - Add MCP tools: whisper_start, whisper_stop, whisper_toggle, whisper_status - Update package.json with separate api/terminal/frontend processes
2026-02-13 23:47:52 -06:00
parent e867b7873e
commit 638e6ac8e0
10 changed files with 1009 additions and 31 deletions
--- a/server/services/whisper.ts
+++ b/server/services/whisper.ts
@@ -0,0 +1,218 @@
+/**
+ * Whisper Service - Manages the Python Whisper server process
+ * Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
+ */
+
+import { join } from 'path'
+import { Subprocess } from 'bun'
+
+const WHISPER_PORT = 4104
+const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
+
+interface WhisperState {
+  enabled: boolean
+  running: boolean
+  process: Subprocess | null
+  model: string
+  device: string
+}
+
+const state: WhisperState = {
+  enabled: false,
+  running: false,
+  process: null,
+  model: 'medium',
+  device: 'cuda'
+}
+
+/**
+ * Kill any process using the Whisper port
+ */
+async function killProcessOnPort(port: number): Promise<void> {
+  try {
+    // Use PowerShell to find and kill process on port
+    const proc = Bun.spawn(['powershell', '-Command',
+      `Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
+    ], { stdout: 'ignore', stderr: 'ignore' })
+    await proc.exited
+    // Wait a moment for port to be released
+    await new Promise(resolve => setTimeout(resolve, 1000))
+  } catch {
+    // Ignore errors
+  }
+}
+
+/**
+ * Start the Whisper Python server
+ */
+export async function startWhisperServer(): Promise<boolean> {
+  if (state.running && state.process) {
+    console.log('[Whisper] Server already running')
+    return true
+  }
+
+  console.log('[Whisper] ====== STARTING (v3) ======')
+  console.log('[Whisper] Script:', WHISPER_SCRIPT)
+
+  // Kill any existing process on the port
+  console.log('[Whisper] Cleaning up port', WHISPER_PORT)
+  await killProcessOnPort(WHISPER_PORT)
+
+  try {
+    // Use Bun.spawn with inherit to show logs directly in console
+    const proc = Bun.spawn(['python', WHISPER_SCRIPT], {
+      cwd: join(import.meta.dir, '..'),
+      stdout: 'inherit',
+      stderr: 'inherit',
+      env: { ...process.env }
+    })
+
+    state.process = proc
+
+    // Wait a bit for the server to start, then check if port is listening
+    await new Promise(resolve => setTimeout(resolve, 3000))
+
+    // Check if process is still running
+    if (proc.exitCode !== null) {
+      console.error('[Whisper] Process exited with code:', proc.exitCode)
+      state.process = null
+      return false
+    }
+
+    // Check if port is listening (simple TCP check)
+    const isListening = await checkPort(WHISPER_PORT)
+
+    if (isListening) {
+      console.log('[Whisper] Server started successfully on port', WHISPER_PORT)
+      state.running = true
+      state.enabled = true
+      return true
+    }
+
+    // Wait more if model is still loading (up to 90 seconds total)
+    console.log('[Whisper] Waiting for model to load...')
+    for (let i = 0; i < 30; i++) {
+      await new Promise(resolve => setTimeout(resolve, 3000))
+
+      if (proc.exitCode !== null) {
+        console.error('[Whisper] Process died while loading')
+        state.process = null
+        return false
+      }
+
+      if (await checkPort(WHISPER_PORT)) {
+        console.log('[Whisper] Server ready!')
+        state.running = true
+        state.enabled = true
+        return true
+      }
+    }
+
+    console.log('[Whisper] Timeout waiting for server')
+    return false
+
+  } catch (err: any) {
+    console.error('[Whisper] Failed to start:', err.message)
+    state.process = null
+    return false
+  }
+}
+
+/**
+ * Check if a port is listening using PowerShell
+ */
+async function checkPort(port: number): Promise<boolean> {
+  try {
+    const proc = Bun.spawn(['powershell', '-Command',
+      `if (Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue) { exit 0 } else { exit 1 }`
+    ], { stdout: 'ignore', stderr: 'ignore' })
+
+    const exitCode = await proc.exited
+    return exitCode === 0
+  } catch {
+    return false
+  }
+}
+
+/**
+ * Stop the Whisper server
+ */
+export function stopWhisperServer(): boolean {
+  if (!state.process) {
+    console.log('[Whisper] No server running')
+    return true
+  }
+
+  console.log('[Whisper] Stopping server...')
+
+  try {
+    state.process.kill()
+    state.process = null
+    state.running = false
+    state.enabled = false
+    console.log('[Whisper] Server stopped')
+    return true
+  } catch (err) {
+    console.error('[Whisper] Error stopping server:', err)
+    return false
+  }
+}
+
+/**
+ * Toggle Whisper server on/off
+ */
+export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean }> {
+  if (state.enabled && state.running) {
+    const success = stopWhisperServer()
+    return { enabled: false, success }
+  } else {
+    const success = await startWhisperServer()
+    return { enabled: success, success }
+  }
+}
+
+/**
+ * Get current Whisper state (checks real port status)
+ */
+export async function getWhisperState(): Promise<{
+  enabled: boolean
+  running: boolean
+  port: number
+  model: string
+  device: string
+}> {
+  // Check if port is actually listening
+  const isListening = await checkPort(WHISPER_PORT)
+
+  // Sync state with reality
+  if (isListening && !state.running) {
+    state.running = true
+    state.enabled = true
+  } else if (!isListening && state.running) {
+    state.running = false
+    state.enabled = false
+    state.process = null
+  }
+
+  return {
+    enabled: state.enabled,
+    running: state.running,
+    port: WHISPER_PORT,
+    model: state.model,
+    device: state.device
+  }
+}
+
+/**
+ * Check if Whisper is enabled
+ */
+export function isWhisperEnabled(): boolean {
+  return state.enabled && state.running
+}
+
+// WebSocket server for Whisper (proxies to Python server or handles directly)
+let whisperWsServer: any = null
+
+export function getWhisperPort(): number {
+  return WHISPER_PORT
+}