chore: Remove dev-dist from git tracking

fix: Improve Whisper server startup with async polling and reduce logs
- Make server startup async to avoid Bun's 10s timeout - Add frontend polling to detect when server is ready - Use PowerShell Get-NetTCPConnection for reliable port detection - Add starting state to prevent multiple simultaneous starts - Reduce verbose logging, keep only essential info - Add dev-dist and nul to gitignore
2026-02-14 01:03:16 -06:00 · 2026-02-14 01:03:02 -06:00 · 2026-02-14 00:28:26 -06:00 · 2026-02-14 00:16:01 -06:00 · 2026-02-13 23:47:52 -06:00
20 changed files with 1315 additions and 8080 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -50,7 +50,9 @@
      "mcp__agent-ui__localhost_4100-notificar",
      "mcp__agent-ui__localhost_4100-enviar_al_panel",
      "mcp__agent-ui__localhost_4100-render_html",
-      "mcp__agent-ui__localhost_4100-load_vue_component"
+      "mcp__agent-ui__localhost_4100-load_vue_component",
+      "mcp__agent-ui__localhost_4100-page_refresh",
+      "WebFetch(domain:docs.anthropic.com)"
    ]
  },
  "enableAllProjectMcpServers": true,
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@ frontend/node_modules/
 .env
 *.log
 dist/
+frontend/dev-dist/
+nul
--- a/frontend/dev-dist/registerSW.js
+++ b/frontend/dev-dist/registerSW.js
@@ -1 +0,0 @@
-if('serviceWorker' in navigator) navigator.serviceWorker.register('/dev-sw.js?dev-sw', { scope: '/', type: 'classic' })
--- a/frontend/dev-dist/suppress-warnings.js
+++ b/frontend/dev-dist/suppress-warnings.js
--- a/frontend/dev-dist/sw.js
+++ b/frontend/dev-dist/sw.js
@@ -1,94 +0,0 @@
-/**
- * Copyright 2018 Google Inc. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *     http://www.apache.org/licenses/LICENSE-2.0
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// If the loader is already loaded, just stop.
-if (!self.define) {
-  let registry = {};
-
-  // Used for `eval` and `importScripts` where we can't get script URL by other means.
-  // In both cases, it's safe to use a global var because those functions are synchronous.
-  let nextDefineUri;
-
-  const singleRequire = (uri, parentUri) => {
-    uri = new URL(uri + ".js", parentUri).href;
-    return registry[uri] || (
-      
-        new Promise(resolve => {
-          if ("document" in self) {
-            const script = document.createElement("script");
-            script.src = uri;
-            script.onload = resolve;
-            document.head.appendChild(script);
-          } else {
-            nextDefineUri = uri;
-            importScripts(uri);
-            resolve();
-          }
-        })
-      
-      .then(() => {
-        let promise = registry[uri];
-        if (!promise) {
-          throw new Error(`Module ${uri} didn’t register its module`);
-        }
-        return promise;
-      })
-    );
-  };
-
-  self.define = (depsNames, factory) => {
-    const uri = nextDefineUri || ("document" in self ? document.currentScript.src : "") || location.href;
-    if (registry[uri]) {
-      // Module is already loading or loaded.
-      return;
-    }
-    let exports = {};
-    const require = depUri => singleRequire(depUri, uri);
-    const specialDeps = {
-      module: { uri },
-      exports,
-      require
-    };
-    registry[uri] = Promise.all(depsNames.map(
-      depName => specialDeps[depName] || require(depName)
-    )).then(deps => {
-      factory(...deps);
-      return exports;
-    });
-  };
-}
-define(['./workbox-5a5d9309'], (function (workbox) { 'use strict';
-
-  self.skipWaiting();
-  workbox.clientsClaim();
-
-  /**
-   * The precacheAndRoute() method efficiently caches and responds to
-   * requests for URLs in the manifest.
-   * See https://goo.gl/S9QRab
-   */
-  workbox.precacheAndRoute([{
-    "url": "suppress-warnings.js",
-    "revision": "d41d8cd98f00b204e9800998ecf8427e"
-  }, {
-    "url": "index.html",
-    "revision": "0.24e3u5ntq78"
-  }], {});
-  workbox.cleanupOutdatedCaches();
-  workbox.registerRoute(new workbox.NavigationRoute(workbox.createHandlerBoundToURL("index.html"), {
-    allowlist: [/^\/$/],
-    denylist: [/^\/api\//]
-  }));
-
-}));
-//# sourceMappingURL=sw.js.map
--- a/frontend/dev-dist/sw.js.map
+++ b/frontend/dev-dist/sw.js.map
--- a/frontend/dev-dist/workbox-5a5d9309.js
+++ b/frontend/dev-dist/workbox-5a5d9309.js
--- a/frontend/dev-dist/workbox-5a5d9309.js.map
+++ b/frontend/dev-dist/workbox-5a5d9309.js.map
--- a/frontend/dev-dist/workbox-c5fd805d.js
+++ b/frontend/dev-dist/workbox-c5fd805d.js
--- a/frontend/dev-dist/workbox-c5fd805d.js.map
+++ b/frontend/dev-dist/workbox-c5fd805d.js.map
--- a/frontend/src/components/FloatingTerminal.vue
+++ b/frontend/src/components/FloatingTerminal.vue
@@ -250,13 +250,38 @@ function initTerminal() {
    }
  })

-  // Capture Ctrl+E even when terminal has focus
+  // Capture Ctrl+E and Ctrl+V when terminal has focus
  terminal.attachCustomKeyEventHandler((e) => {
+    // Ctrl+E: Toggle terminal
    if (e.ctrlKey && e.key === 'e') {
      e.preventDefault()
      toggleTerminal()
-      return false // Prevent terminal from processing
+      return false
    }
+
+    // Ctrl+V: Paste from clipboard
+    if (e.ctrlKey && e.key === 'v' && e.type === 'keydown') {
+      e.preventDefault()
+      navigator.clipboard.readText().then((text) => {
+        if (text && socket && socket.readyState === WebSocket.OPEN) {
+          socket.send(JSON.stringify({ type: 'input', data: text }))
+        }
+      }).catch((err) => {
+        console.error('[Terminal] Clipboard read failed:', err)
+      })
+      return false
+    }
+
+    // Ctrl+C: Copy selection (if any)
+    if (e.ctrlKey && e.key === 'c' && e.type === 'keydown') {
+      const selection = terminal?.getSelection()
+      if (selection) {
+        navigator.clipboard.writeText(selection).catch(console.error)
+        return false
+      }
+      // If no selection, let Ctrl+C pass through as SIGINT
+    }
+
    return true // Let terminal handle other keys
  })
 }
--- a/frontend/src/components/FloatingVoice.vue
+++ b/frontend/src/components/FloatingVoice.vue
@@ -23,6 +23,11 @@ const transcript = ref('')
 const interimTranscript = ref('')
 const error = ref('')

+// Typing animation state
+const animatedTranscript = ref('')
+let typingTimeout: number | null = null
+let lastAnimatedLength = 0
+
 // Position and drag state
 const position = ref({ x: 0, y: 0 })
 const hasCustomPosition = ref(false)
@@ -30,18 +35,32 @@ const isDragging = ref(false)
 const dragOffset = ref({ x: 0, y: 0 })
 const containerRef = ref<HTMLElement | null>(null)

-// Speech recognition
+// Speech recognition (Web Speech API)
 let recognition: SpeechRecognition | null = null

-// WebSocket connection (own session)
+// WebSocket connection to terminal
 const WS_URL = `ws://${window.location.hostname}:4103`
 let socket: WebSocket | null = null
 const connected = ref(false)

-// Push-to-talk state (Ctrl+S)
+// Push-to-talk state (Ctrl+Space)
 let keyDownTime = 0
 let holdTimeout: number | null = null
 const isPushToTalk = ref(false)
+let pendingWhisperSend = false // Flag to send transcript when Whisper responds
+
+// ============ WHISPER MODE ============
+const useWhisper = ref(false)
+const whisperReady = ref(false)
+const whisperLoading = ref(false)
+const WHISPER_WS_URL = `ws://${window.location.hostname}:4104`
+let whisperSocket: WebSocket | null = null
+let mediaRecorder: MediaRecorder | null = null
+let audioChunks: Blob[] = []
+let lastTranscriptLength = 0 // Track length of last transcription to show only new text
+let chunkInterval: number | null = null
+const CHUNK_INTERVAL_MS = 3000 // Send audio every 3 seconds
+let mediaStream: MediaStream | null = null

 const displayText = computed(() => {
  if (interimTranscript.value) {
@@ -73,7 +92,7 @@ function initRecognition() {
  const rec = new SpeechRecognition()
  rec.continuous = true
  rec.interimResults = true
-  rec.lang = 'es-ES'
+  rec.lang = 'es-419' // Latin American Spanish (better for accents)

  rec.onresult = (event: SpeechRecognitionEvent) => {
    let interim = ''
@@ -105,7 +124,7 @@ function initRecognition() {
  }

  rec.onend = () => {
-    if (isRecording.value) {
+    if (isRecording.value && !useWhisper.value) {
      // Restart if still recording (browser stops after silence)
      rec.start()
    }
@@ -114,6 +133,307 @@ function initRecognition() {
  return rec
 }

+// ============ WHISPER FUNCTIONS ============
+
+async function checkWhisperStatus(updateLoading = true) {
+  try {
+    const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
+    const data = await res.json()
+    useWhisper.value = data.enabled
+    whisperReady.value = data.running
+    if (updateLoading) {
+      whisperLoading.value = data.starting || false
+    }
+    return data
+  } catch {
+    useWhisper.value = false
+    whisperReady.value = false
+    if (updateLoading) {
+      whisperLoading.value = false
+    }
+    return null
+  }
+}
+
+async function toggleWhisperMode() {
+  // Prevent multiple clicks
+  if (whisperLoading.value) {
+    console.log('[Voice] Toggle already in progress, ignoring')
+    return
+  }
+
+  whisperLoading.value = true
+  error.value = ''
+
+  // Show immediate feedback
+  if (!useWhisper.value) {
+    canvasStore.showNotification('Starting Whisper GPU server...', 'info', 10000)
+  }
+
+  try {
+    const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
+      method: 'POST'
+    })
+    const data = await res.json()
+
+    // Server is starting - poll until ready
+    if (data.starting) {
+      console.log('[Voice] Server starting, polling for status...')
+      await pollWhisperStatus()
+      return
+    }
+
+    useWhisper.value = data.enabled
+    whisperReady.value = data.running
+
+    if (data.enabled) {
+      canvasStore.showNotification('Whisper GPU ready!', 'success')
+      connectWhisperSocket()
+    } else {
+      canvasStore.showNotification('Using Web Speech API', 'info')
+      disconnectWhisperSocket()
+    }
+  } catch (e: any) {
+    error.value = 'Failed to toggle Whisper'
+    canvasStore.showNotification('Error starting Whisper server', 'error')
+    console.error('[Voice] Whisper toggle error:', e)
+  } finally {
+    whisperLoading.value = false
+  }
+}
+
+// Poll server status until ready or failed
+async function pollWhisperStatus() {
+  const maxAttempts = 60  // 2 minutes max
+  let attempts = 0
+
+  while (attempts < maxAttempts) {
+    await new Promise(resolve => setTimeout(resolve, 2000))
+    attempts++
+
+    try {
+      const status = await checkWhisperStatus(false)  // Don't update loading state
+
+      if (!status) {
+        console.log('[Voice] Failed to get status')
+        continue
+      }
+
+      // Still starting
+      if (status.starting) {
+        console.log(`[Voice] Still starting... (${attempts * 2}s)`)
+        continue
+      }
+
+      // Started successfully
+      if (status.running && status.enabled) {
+        console.log('[Voice] Server ready!')
+        canvasStore.showNotification('Whisper GPU ready!', 'success')
+        connectWhisperSocket()
+        whisperLoading.value = false
+        return
+      }
+
+      // Failed to start
+      console.log('[Voice] Server failed to start')
+      canvasStore.showNotification('Whisper server failed to start', 'error')
+      whisperLoading.value = false
+      return
+
+    } catch (e) {
+      console.error('[Voice] Polling error:', e)
+    }
+  }
+
+  // Timeout
+  canvasStore.showNotification('Whisper server timeout', 'error')
+  whisperLoading.value = false
+}
+
+function connectWhisperSocket() {
+  if (whisperSocket?.readyState === WebSocket.OPEN) return
+
+  console.log('[Voice] Connecting to Whisper server...')
+  whisperSocket = new WebSocket(WHISPER_WS_URL)
+
+  whisperSocket.onopen = () => {
+    console.log('[Voice] Whisper WebSocket connected')
+    whisperReady.value = true
+  }
+
+  whisperSocket.onmessage = (event) => {
+    try {
+      const msg = JSON.parse(event.data)
+
+      if (msg.type === 'ready') {
+        console.log('[Voice] Whisper ready:', msg.model, msg.device)
+        whisperReady.value = true
+      } else if (msg.type === 'transcription') {
+        if (msg.success && msg.text) {
+          const fullText = msg.text.trim()
+
+          if (msg.partial) {
+            // For partial results, show full accumulated transcription
+            transcript.value = fullText + ' '
+            interimTranscript.value = ''
+            console.log(`[Voice] 🔄 WHISPER partial:`, fullText)
+          } else {
+            // Final result
+            transcript.value = fullText + ' '
+            interimTranscript.value = ''
+            console.log(`[Voice] 🎯 WHISPER-GPU (${msg.model}/${msg.device}):`, fullText)
+
+            // Auto-send if push-to-talk was waiting for this
+            if (pendingWhisperSend) {
+              pendingWhisperSend = false
+              console.log('[Voice] Whisper response received, sending transcript')
+              if (transcript.value.trim()) {
+                sendTranscriptAndClose()
+              } else {
+                isPushToTalk.value = false
+                close()
+              }
+            }
+          }
+
+          // Update last transcript length for next partial
+          lastTranscriptLength = fullText.length
+        } else if (msg.error) {
+          error.value = msg.error
+          console.error('[Voice] Whisper error:', msg.error)
+          // Clear pending send on error
+          if (pendingWhisperSend) {
+            pendingWhisperSend = false
+            isPushToTalk.value = false
+          }
+        }
+      }
+    } catch (e) {
+      console.error('[Voice] Whisper message error:', e)
+    }
+  }
+
+  whisperSocket.onclose = () => {
+    console.log('[Voice] Whisper WebSocket closed')
+    whisperReady.value = false
+  }
+
+  whisperSocket.onerror = (e) => {
+    console.error('[Voice] Whisper WebSocket error:', e)
+    whisperReady.value = false
+  }
+}
+
+function disconnectWhisperSocket() {
+  if (whisperSocket) {
+    whisperSocket.close()
+    whisperSocket = null
+  }
+  whisperReady.value = false
+}
+
+async function startWhisperRecording() {
+  try {
+    mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
+
+    mediaRecorder = new MediaRecorder(mediaStream, {
+      mimeType: 'audio/webm;codecs=opus'
+    })
+
+    audioChunks = []
+
+    mediaRecorder.ondataavailable = (event) => {
+      if (event.data.size > 0) {
+        audioChunks.push(event.data)
+      }
+    }
+
+    // Reset state for new recording
+    audioChunks = []
+    lastTranscriptLength = 0
+
+    // Start recording
+    mediaRecorder.start(100) // Collect data every 100ms
+    isRecording.value = true
+
+    // Send chunks periodically for progressive transcription
+    chunkInterval = window.setInterval(() => {
+      if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) {
+        sendAudioChunk(false) // false = partial, don't clear
+      }
+    }, CHUNK_INTERVAL_MS)
+
+  } catch (e: any) {
+    error.value = `Microphone error: ${e.message}`
+    console.error('[Voice] Microphone error:', e)
+  }
+}
+
+function sendAudioChunk(isFinal: boolean) {
+  if (audioChunks.length === 0) return
+
+  // Always send ALL accumulated audio (webm needs header from first chunk)
+  const audioBlob = new Blob(audioChunks, { type: 'audio/webm' })
+  const chunkCount = audioChunks.length
+
+  // Skip if audio is too small (< 5KB) - WebM header alone is ~1-2KB
+  if (audioBlob.size < 5000) {
+    console.log(`[Voice] Skipping small chunk (${audioBlob.size} bytes)`)
+    if (isFinal) {
+      audioChunks = []
+    }
+    return
+  }
+
+  // Clear chunks only if final
+  if (isFinal) {
+    audioChunks = []
+    lastTranscriptLength = 0
+  }
+
+  const reader = new FileReader()
+  reader.onloadend = () => {
+    const base64 = (reader.result as string).split(',')[1]
+
+    if (whisperSocket?.readyState === WebSocket.OPEN) {
+      whisperSocket.send(JSON.stringify({
+        type: 'transcribe',
+        audio: base64,
+        language: 'es',
+        partial: !isFinal
+      }))
+      console.log(`[Voice] Sent ${isFinal ? 'FINAL' : 'partial'} audio (${chunkCount} chunks, ${audioBlob.size} bytes)`)
+    }
+  }
+  reader.readAsDataURL(audioBlob)
+}
+
+function stopWhisperRecording() {
+  // Clear the chunk interval
+  if (chunkInterval) {
+    clearInterval(chunkInterval)
+    chunkInterval = null
+  }
+
+  // Send final chunk
+  if (audioChunks.length > 0) {
+    sendAudioChunk(true) // true = final
+  }
+
+  // Stop recorder
+  if (mediaRecorder && mediaRecorder.state !== 'inactive') {
+    mediaRecorder.stop()
+  }
+
+  // Stop media stream
+  if (mediaStream) {
+    mediaStream.getTracks().forEach(track => track.stop())
+    mediaStream = null
+  }
+
+  isRecording.value = false
+}
+
 function toggleRecording() {
  if (isRecording.value) {
    stopRecording()
@@ -124,30 +444,47 @@ function toggleRecording() {

 function startRecording() {
  error.value = ''
-  if (!recognition) {
-    recognition = initRecognition()
-  }
-  if (recognition) {
-    try {
-      recognition.start()
-      isRecording.value = true
-    } catch (e) {
-      console.error('[Voice] Failed to start:', e)
+
+  if (useWhisper.value && whisperReady.value) {
+    // Use Whisper GPU mode
+    startWhisperRecording()
+  } else {
+    // Use Web Speech API
+    if (!recognition) {
+      recognition = initRecognition()
+    }
+    if (recognition) {
+      try {
+        recognition.start()
+        isRecording.value = true
+      } catch (e) {
+        console.error('[Voice] Failed to start:', e)
+      }
    }
  }
 }

 function stopRecording() {
-  if (recognition) {
-    recognition.stop()
+  if (useWhisper.value) {
+    stopWhisperRecording()
+  } else {
+    if (recognition) {
+      recognition.stop()
+    }
+    isRecording.value = false
  }
-  isRecording.value = false
  interimTranscript.value = ''
 }

 function clearTranscript() {
  transcript.value = ''
  interimTranscript.value = ''
+  animatedTranscript.value = ''
+  lastAnimatedLength = 0
+  if (typingTimeout) {
+    clearTimeout(typingTimeout)
+    typingTimeout = null
+  }
 }

 function connectSocket() {
@@ -209,6 +546,7 @@ function sendTranscript() {

 function close() {
  stopRecording()
+  clearTranscript()
  isOpen.value = false
 }

@@ -290,21 +628,31 @@ function handleKeyUp(e: KeyboardEvent) {
      holdTimeout = null
    }

-    // If was push-to-talk recording, stop and send after 1200ms
+    // If was push-to-talk recording, continue recording for 1.5s buffer then stop
    if (isPushToTalk.value && isRecording.value) {
-      console.log('[Voice] Stopping recording, will send in 1200ms')
-      stopRecording()
+      console.log('[Voice] Key released, continuing recording for 1.5s buffer...')
+
+      // Keep recording for 1.5s more (UX buffer for trailing words)
      setTimeout(() => {
-        console.log('[Voice] Sending transcript:', transcript.value.trim())
-        console.log('[Voice] Socket state:', socket?.readyState)
-        if (transcript.value.trim()) {
-          sendTranscriptAndClose()
+        console.log('[Voice] Buffer complete, stopping recording')
+        stopRecording()
+
+        if (useWhisper.value) {
+          // For Whisper: wait for server response (handled in onmessage)
+          console.log('[Voice] Waiting for Whisper transcription...')
+          pendingWhisperSend = true
        } else {
-          // No transcript, just close
-          isPushToTalk.value = false
-          close()
+          // For Web Speech API: send after short delay for final results
+          setTimeout(() => {
+            if (transcript.value.trim()) {
+              sendTranscriptAndClose()
+            } else {
+              isPushToTalk.value = false
+              close()
+            }
+          }, 300)
        }
-      }, 1200)
+      }, 1500)
    }

    keyDownTime = 0
@@ -349,17 +697,74 @@ function sendTranscriptAndClose() {
  typeChar()
 }

-onMounted(() => {
+// Typing animation effect
+function animateTyping(targetText: string) {
+  // Clear any pending animation
+  if (typingTimeout) {
+    clearTimeout(typingTimeout)
+    typingTimeout = null
+  }
+
+  // If new text is shorter, just set it (user cleared or correction)
+  if (targetText.length < animatedTranscript.value.length) {
+    animatedTranscript.value = targetText
+    lastAnimatedLength = targetText.length
+    return
+  }
+
+  // Start from where we left off
+  const startIndex = lastAnimatedLength
+
+  // Type remaining characters one by one
+  function typeNext(index: number) {
+    if (index <= targetText.length) {
+      animatedTranscript.value = targetText.substring(0, index)
+      lastAnimatedLength = index
+
+      if (index < targetText.length) {
+        // Faster typing speed: 15-25ms per character
+        const delay = 15 + Math.random() * 10
+        typingTimeout = window.setTimeout(() => typeNext(index + 1), delay)
+      }
+    }
+  }
+
+  typeNext(startIndex)
+}
+
+// Watch transcript changes for typing animation
+watch(transcript, (newVal) => {
+  animateTyping(newVal)
+})
+
+onMounted(async () => {
  recognition = initRecognition()
  // Use capture phase to intercept before terminal or other elements
  document.addEventListener('keydown', handleKeyDown, { capture: true })
  document.addEventListener('keyup', handleKeyUp, { capture: true })
+
+  // Check Whisper status on mount
+  const status = await checkWhisperStatus()
+
+  // If server is starting (page was reloaded during startup), continue polling
+  if (status?.starting) {
+    console.log('[Voice] Server is starting, resuming polling...')
+    pollWhisperStatus()
+  } else if (useWhisper.value) {
+    connectWhisperSocket()
+  }
 })

 onBeforeUnmount(() => {
  stopRecording()
  recognition = null
  disconnectSocket()
+  disconnectWhisperSocket()
+  if (chunkInterval) clearInterval(chunkInterval)
+  if (typingTimeout) clearTimeout(typingTimeout)
+  if (mediaStream) {
+    mediaStream.getTracks().forEach(track => track.stop())
+  }
  document.removeEventListener('keydown', handleKeyDown, { capture: true })
  document.removeEventListener('keyup', handleKeyUp, { capture: true })
  document.removeEventListener('mousemove', onDrag)
@@ -408,8 +813,24 @@ defineExpose({
              </svg>
              <span>Voice</span>
              <i class="dot" :class="{ recording: isRecording, ptt: isPushToTalk }"></i>
+              <span class="mode-badge" :class="{ gpu: useWhisper }">
+                {{ useWhisper ? 'GPU' : 'Web' }}
+              </span>
            </div>
            <div class="window-controls">
+              <button
+                class="whisper-toggle"
+                :class="{ active: useWhisper, loading: whisperLoading }"
+                :disabled="whisperLoading"
+                @click.stop="toggleWhisperMode"
+                :title="whisperLoading ? 'Starting Whisper server...' : (useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU')"
+              >
+                <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                  <rect x="4" y="4" width="16" height="16" rx="2"/>
+                  <line x1="9" y1="9" x2="9" y2="15"/>
+                  <line x1="15" y1="9" x2="15" y2="15"/>
+                </svg>
+              </button>
              <button class="x" @click="close" title="Close">
                <svg width="8" height="8" viewBox="0 0 10 10">
                  <line x1="0" y1="0" x2="10" y2="10" stroke="currentColor" stroke-width="1.5"/>
@@ -421,10 +842,10 @@ defineExpose({

          <!-- Content -->
          <div class="content">
-            <div class="transcript" :class="{ empty: !transcript && !interimTranscript }">
-              <span class="final">{{ transcript }}</span>
+            <div class="transcript" :class="{ empty: !animatedTranscript && !interimTranscript }">
+              <span class="final">{{ animatedTranscript }}</span><span class="cursor" v-if="animatedTranscript && animatedTranscript.length < transcript.length">|</span>
              <span class="interim">{{ interimTranscript }}</span>
-              <span v-if="!transcript && !interimTranscript" class="placeholder">
+              <span v-if="!animatedTranscript && !interimTranscript" class="placeholder">
                Presiona el micrófono o mantén Ctrl+Space...
              </span>
            </div>
@@ -545,6 +966,58 @@ defineExpose({
  box-shadow: 0 0 6px #f90;
 }

+.mode-badge {
+  font-size: 8px;
+  padding: 1px 4px;
+  border-radius: 3px;
+  background: rgba(0, 0, 0, 0.2);
+  color: #555;
+  font-weight: 600;
+  text-transform: uppercase;
+}
+
+.mode-badge.gpu {
+  background: linear-gradient(135deg, #10b981, #059669);
+  color: #fff;
+  box-shadow: 0 0 4px rgba(16, 185, 129, 0.5);
+}
+
+.whisper-toggle {
+  width: 20px;
+  height: 18px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  background: rgba(255, 255, 255, 0.3);
+  border: 1px solid rgba(0, 0, 0, 0.1);
+  border-radius: 3px;
+  color: #666;
+  cursor: pointer;
+  transition: all 0.15s;
+}
+
+.whisper-toggle:hover:not(:disabled) {
+  background: rgba(255, 255, 255, 0.5);
+}
+
+.whisper-toggle:disabled {
+  cursor: not-allowed;
+  opacity: 0.6;
+}
+
+.whisper-toggle.active {
+  background: linear-gradient(180deg, #10b981 0%, #059669 100%);
+  border-color: #047857;
+  color: #fff;
+}
+
+.whisper-toggle.loading {
+  animation: pulse 0.6s infinite;
+  background: linear-gradient(180deg, #f59e0b 0%, #d97706 100%);
+  border-color: #b45309;
+  color: #fff;
+}
+
@keyframes pulse {
  0%, 100% { opacity: 1; }
  50% { opacity: 0.5; }
@@ -601,6 +1074,17 @@ defineExpose({
  font-style: italic;
 }

+.transcript .cursor {
+  color: #4a9;
+  font-weight: bold;
+  animation: blink 0.6s infinite;
+}
+
+@keyframes blink {
+  0%, 50% { opacity: 1; }
+  51%, 100% { opacity: 0; }
+}
+
 .transcript .placeholder {
  color: #888;
 }
--- a/frontend/src/services/tools/handlers/globalHandlers.ts
+++ b/frontend/src/services/tools/handlers/globalHandlers.ts
@@ -223,6 +223,112 @@ export function createGlobalHandlers(callbacks: ToolManagementCallbacks): ToolCo
        }, 100)
        return 'Recargando pagina...'
      }
+    },
+    {
+      name: 'whisper_status',
+      description: 'Obtiene el estado del servidor Whisper GPU para speech-to-text.',
+      category: 'global',
+      schema: {
+        type: 'object',
+        properties: {}
+      },
+      handler: async () => {
+        try {
+          const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
+          const data = await res.json()
+          return `Whisper GPU Status:\n` +
+            `  Enabled: ${data.enabled ? 'Yes' : 'No'}\n` +
+            `  Running: ${data.running ? 'Yes' : 'No'}\n` +
+            `  Model: ${data.model}\n` +
+            `  Device: ${data.device}\n` +
+            `  Port: ${data.port}`
+        } catch (e: any) {
+          return `Error checking Whisper status: ${e.message}`
+        }
+      }
+    },
+    {
+      name: 'whisper_toggle',
+      description: 'Activa o desactiva Whisper GPU para speech-to-text. Cuando esta activo usa la GPU para transcribir voz con mejor precision para acentos latinos.',
+      category: 'global',
+      schema: {
+        type: 'object',
+        properties: {}
+      },
+      handler: async () => {
+        try {
+          const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
+            method: 'POST'
+          })
+          const data = await res.json()
+
+          if (data.enabled) {
+            return `Whisper GPU ENABLED\n` +
+              `  Model: ${data.model}\n` +
+              `  Device: ${data.device}\n` +
+              `  Port: ws://localhost:${data.port}\n\n` +
+              `Voice input will now use GPU-accelerated transcription.`
+          } else {
+            return `Whisper GPU DISABLED\n\n` +
+              `Voice input will use Web Speech API (browser native).`
+          }
+        } catch (e: any) {
+          return `Error toggling Whisper: ${e.message}`
+        }
+      }
+    },
+    {
+      name: 'whisper_start',
+      description: 'Inicia el servidor Whisper GPU si no esta corriendo.',
+      category: 'global',
+      schema: {
+        type: 'object',
+        properties: {}
+      },
+      handler: async () => {
+        try {
+          const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/start`, {
+            method: 'POST'
+          })
+          const data = await res.json()
+
+          if (data.success) {
+            return `Whisper server started!\n` +
+              `  Model: ${data.model}\n` +
+              `  Device: ${data.device}\n` +
+              `  Ready: ${data.running ? 'Yes' : 'Loading...'}`
+          } else {
+            return `Failed to start Whisper server: ${data.message}`
+          }
+        } catch (e: any) {
+          return `Error starting Whisper: ${e.message}`
+        }
+      }
+    },
+    {
+      name: 'whisper_stop',
+      description: 'Detiene el servidor Whisper GPU para liberar memoria de la GPU.',
+      category: 'global',
+      schema: {
+        type: 'object',
+        properties: {}
+      },
+      handler: async () => {
+        try {
+          const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/stop`, {
+            method: 'POST'
+          })
+          const data = await res.json()
+
+          if (data.success) {
+            return `Whisper server stopped. GPU memory released.`
+          } else {
+            return `Failed to stop Whisper server: ${data.message}`
+          }
+        } catch (e: any) {
+          return `Error stopping Whisper: ${e.message}`
+        }
+      }
    }
  ]
 }
--- a/package.json
+++ b/package.json
@@ -3,7 +3,10 @@
  "version": "1.0.0",
  "description": "Dynamic canvas for Claude Code interaction",
  "scripts": {
-    "start": "concurrently -n server,frontend -c blue,green \"cd server && bun --watch run index.ts\" \"cd frontend && bun run dev --host\""
+    "start": "concurrently -n api,terminal,frontend -c blue,yellow,green \"cd server && bun --watch run index.ts\" \"cd server && bun run terminal.ts\" \"cd frontend && bun run dev --host\"",
+    "start:api": "cd server && bun --watch run index.ts",
+    "start:terminal": "cd server && bun run terminal.ts",
+    "start:frontend": "cd frontend && bun run dev --host"
  },
  "devDependencies": {
    "concurrently": "^9.2.1"
--- a/server/index.ts
+++ b/server/index.ts
@@ -1,7 +1,6 @@
 import { PORT_HTTP, WORKING_DIR } from './config'
 import { initDatabase } from './db'
 import { handleRequest } from './routes'
-import { startTerminalServer } from './services/terminal'

 // Initialize database
 initDatabase()
@@ -12,18 +11,10 @@ Bun.serve({
  fetch: handleRequest
 })

-console.log(`[HTTP] API running at http://localhost:${PORT_HTTP}`)
-
-// Start Terminal WebSocket server
-startTerminalServer()
-
 // Startup summary
 console.log('')
 console.log('='.repeat(50))
-console.log('Agent UI Server started')
+console.log('Agent UI API Server (hot-reload enabled)')
 console.log(`  API: http://localhost:${PORT_HTTP}`)
-console.log(`  Terminal: ws://localhost:4103`)
 console.log(`  Working Dir: ${WORKING_DIR}`)
-console.log('')
-console.log('WebMCP starts separately with Claude Code MCP')
 console.log('='.repeat(50))
--- a/server/routes/index.ts
+++ b/server/routes/index.ts
@@ -7,6 +7,7 @@ import { handleThemes, handleActiveTheme, handleDesignTokens, handleThemeById, h
 import { handleCanvas, handleCanvasById, handleToolbarCanvas, handleDefaultCanvas, handleCanvasComponents, handleCanvasComponentById } from './canvas'
 import { handleGiteaRepo, handleGiteaTree, handleGiteaFile } from './gitea'
 import { handleTables, handleStats, handleTableSchema, handleTableData, handleQuery } from './database'
+import { handleWhisperRoutes } from './whisper'

 export async function handleRequest(req: Request): Promise<Response> {
  const url = new URL(req.url)
@@ -168,5 +169,11 @@ export async function handleRequest(req: Request): Promise<Response> {
    return handleQuery(req)
  }

+  // Whisper (GPU speech-to-text)
+  if (path.startsWith('/api/whisper/')) {
+    const res = await handleWhisperRoutes(req)
+    if (res) return res
+  }
+
  return notFoundResponse()
 }
--- a/server/routes/whisper.ts
+++ b/server/routes/whisper.ts
@@ -0,0 +1,66 @@
+/**
+ * Whisper API routes
+ * Control the local GPU-accelerated speech-to-text server
+ */
+
+import {
+  startWhisperServer,
+  stopWhisperServer,
+  toggleWhisperServer,
+  getWhisperState,
+  getWhisperPort
+} from '../services/whisper'
+
+export async function handleWhisperRoutes(req: Request): Promise<Response | null> {
+  const url = new URL(req.url)
+  const path = url.pathname
+
+  // GET /api/whisper/status - Get current state
+  if (path === '/api/whisper/status' && req.method === 'GET') {
+    const state = await getWhisperState()
+    return Response.json(state)
+  }
+
+  // POST /api/whisper/start - Start Whisper server
+  if (path === '/api/whisper/start' && req.method === 'POST') {
+    const success = await startWhisperServer()
+    const state = await getWhisperState()
+    return Response.json({
+      success,
+      ...state,
+      message: success ? 'Whisper server started' : 'Failed to start Whisper server'
+    })
+  }
+
+  // POST /api/whisper/stop - Stop Whisper server
+  if (path === '/api/whisper/stop' && req.method === 'POST') {
+    const success = stopWhisperServer()
+    const state = await getWhisperState()
+    return Response.json({
+      success,
+      ...state,
+      message: success ? 'Whisper server stopped' : 'Failed to stop Whisper server'
+    })
+  }
+
+  // POST /api/whisper/toggle - Toggle Whisper on/off
+  if (path === '/api/whisper/toggle' && req.method === 'POST') {
+    const result = await toggleWhisperServer()
+    const state = await getWhisperState()
+    return Response.json({
+      ...result,
+      ...state,
+      message: state.enabled ? 'Whisper enabled (GPU)' : 'Whisper disabled (using Web Speech API)'
+    })
+  }
+
+  // GET /api/whisper/port - Get Whisper WebSocket port
+  if (path === '/api/whisper/port' && req.method === 'GET') {
+    return Response.json({
+      port: getWhisperPort(),
+      url: `ws://localhost:${getWhisperPort()}`
+    })
+  }
+
+  return null
+}
--- a/server/services/whisper.ts
+++ b/server/services/whisper.ts
@@ -0,0 +1,247 @@
+/**
+ * Whisper Service - Manages the Python Whisper server process
+ * Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
+ */
+
+import { join } from 'path'
+import { Subprocess } from 'bun'
+
+const WHISPER_PORT = 4104
+const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
+
+interface WhisperState {
+  enabled: boolean
+  running: boolean
+  starting: boolean  // Prevents multiple simultaneous start attempts
+  process: Subprocess | null
+  model: string
+  device: string
+}
+
+const state: WhisperState = {
+  enabled: false,
+  running: false,
+  starting: false,
+  process: null,
+  model: 'large-v3',
+  device: 'cuda'
+}
+
+/**
+ * Kill any process using the Whisper port
+ */
+async function killProcessOnPort(port: number): Promise<void> {
+  try {
+    // Use PowerShell to find and kill process on port
+    const proc = Bun.spawn(['powershell', '-Command',
+      `Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
+    ], { stdout: 'ignore', stderr: 'ignore' })
+    await proc.exited
+    // Wait a moment for port to be released
+    await new Promise(resolve => setTimeout(resolve, 1000))
+  } catch {
+    // Ignore errors
+  }
+}
+
+/**
+ * Start the Whisper Python server
+ */
+export async function startWhisperServer(): Promise<boolean> {
+  // Prevent multiple simultaneous start attempts
+  if (state.starting) {
+    return false
+  }
+
+  if (state.running && state.process) {
+    return true
+  }
+
+  state.starting = true
+  console.log(`[Whisper] Starting (${state.model})...`)
+
+  // Kill any existing process on the port
+  await killProcessOnPort(WHISPER_PORT)
+
+  try {
+    // Use Bun.spawn with inherit to show logs directly in console
+    // -u flag disables Python output buffering for real-time logs
+    const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
+      cwd: join(import.meta.dir, '..'),
+      stdout: 'inherit',
+      stderr: 'inherit',
+      env: { ...process.env, PYTHONUNBUFFERED: '1' }
+    })
+
+    state.process = proc
+
+    // Wait a bit for the server to start
+    await new Promise(resolve => setTimeout(resolve, 2000))
+
+    // Check if process is still running
+    if (proc.exitCode !== null) {
+      console.error('[Whisper] Process exited with code:', proc.exitCode)
+      state.process = null
+      state.starting = false
+      return false
+    }
+
+    // Check if WebSocket is ready
+    const isListening = await checkPort(WHISPER_PORT)
+
+    if (isListening) {
+      console.log('[Whisper] Ready')
+      state.running = true
+      state.enabled = true
+      state.starting = false
+      return true
+    }
+
+    // Wait more if model is still loading (up to 120 seconds total for large models)
+    for (let i = 0; i < 40; i++) {
+      await new Promise(resolve => setTimeout(resolve, 3000))
+
+      if (proc.exitCode !== null) {
+        console.error('[Whisper] Process died')
+        state.process = null
+        state.starting = false
+        return false
+      }
+
+      const ready = await checkPort(WHISPER_PORT)
+      if (ready) {
+        console.log('[Whisper] Ready')
+        state.running = true
+        state.enabled = true
+        state.starting = false
+        return true
+      }
+    }
+
+    console.error('[Whisper] Timeout (120s)')
+    state.starting = false
+    return false
+
+  } catch (err: any) {
+    console.error('[Whisper] Error:', err.message)
+    state.process = null
+    state.starting = false
+    return false
+  }
+}
+
+/**
+ * Check if Whisper WebSocket is ready using PowerShell
+ */
+async function checkPort(port: number): Promise<boolean> {
+  try {
+    const proc = Bun.spawn(['powershell', '-NoProfile', '-Command',
+      `$c = Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue; if ($c) { Write-Output 'LISTENING' }`
+    ], {
+      stdout: 'pipe',
+      stderr: 'ignore'
+    })
+
+    const output = await new Response(proc.stdout).text()
+    await proc.exited
+
+    return output.trim() === 'LISTENING'
+  } catch {
+    return false
+  }
+}
+
+/**
+ * Stop the Whisper server
+ */
+export function stopWhisperServer(): boolean {
+  if (!state.process) {
+    return true
+  }
+
+  try {
+    state.process.kill()
+    state.process = null
+    state.running = false
+    state.enabled = false
+    console.log('[Whisper] Stopped')
+    return true
+  } catch (err) {
+    console.error('[Whisper] Stop error:', err)
+    return false
+  }
+}
+
+/**
+ * Toggle Whisper server on/off (async - returns immediately when starting)
+ */
+export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
+  // Prevent toggle while starting
+  if (state.starting) {
+    return { enabled: false, success: false, starting: true }
+  }
+
+  if (state.enabled && state.running) {
+    const success = stopWhisperServer()
+    return { enabled: false, success, starting: false }
+  } else {
+    // Start server in background - don't await
+    startWhisperServer().catch(err => {
+      console.error('[Whisper] Start error:', err)
+      state.starting = false
+    })
+
+    // Return immediately - frontend will poll for status
+    return { enabled: false, success: true, starting: true }
+  }
+}
+
+/**
+ * Get current Whisper state (checks real port status)
+ */
+export async function getWhisperState(): Promise<{
+  enabled: boolean
+  running: boolean
+  starting: boolean
+  port: number
+  model: string
+  device: string
+}> {
+  // Check if port is actually listening (skip if starting to avoid interference)
+  if (!state.starting) {
+    const isListening = await checkPort(WHISPER_PORT)
+
+    // Sync state with reality
+    if (isListening && !state.running) {
+      state.running = true
+      state.enabled = true
+    } else if (!isListening && state.running) {
+      state.running = false
+      state.enabled = false
+      state.process = null
+    }
+  }
+
+  return {
+    enabled: state.enabled,
+    running: state.running,
+    starting: state.starting,
+    port: WHISPER_PORT,
+    model: state.model,
+    device: state.device
+  }
+}
+
+/**
+ * Check if Whisper is enabled
+ */
+export function isWhisperEnabled(): boolean {
+  return state.enabled && state.running
+}
+
+// WebSocket server for Whisper (proxies to Python server or handles directly)
+let whisperWsServer: any = null
+
+export function getWhisperPort(): number {
+  return WHISPER_PORT
+}
--- a/server/terminal.ts
+++ b/server/terminal.ts
@@ -0,0 +1,22 @@
+#!/usr/bin/env bun
+/**
+ * Terminal Server - Independent process
+ * This runs separately from the main server to maintain stable Claude Code sessions
+ * even when the main server restarts due to code changes.
+ */
+
+import { startTerminalServer } from './services/terminal'
+import { WORKING_DIR } from './config'
+
+console.log('')
+console.log('='.repeat(50))
+console.log('Terminal Server (Independent Process)')
+console.log(`  WebSocket: ws://localhost:4103`)
+console.log(`  Working Dir: ${WORKING_DIR}`)
+console.log('')
+console.log('This process is stable and won\'t restart')
+console.log('when the main server reloads.')
+console.log('='.repeat(50))
+console.log('')
+
+startTerminalServer()
--- a/server/whisper_server.py
+++ b/server/whisper_server.py
@@ -0,0 +1,314 @@
+#!/usr/bin/env python3
+"""
+Whisper Server - GPU-accelerated speech-to-text using faster-whisper
+WebSocket server that receives audio and returns transcriptions
+"""
+
+import asyncio
+import json
+import sys
+import io
+import wave
+import tempfile
+import os
+import subprocess
+from pathlib import Path
+
+try:
+    import websockets
+    from faster_whisper import WhisperModel
+except ImportError as e:
+    print(f"Missing dependency: {e}")
+    print("Run: pip install faster-whisper websockets")
+    sys.exit(1)
+
+
+def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes:
+    """
+    Convert audio data to WAV format using ffmpeg.
+    Whisper requires WAV/PCM format, but browsers typically record in WebM/Opus.
+    """
+    # Create temp files for input and output
+    with tempfile.NamedTemporaryFile(suffix=f".{input_format}", delete=False) as in_file:
+        in_file.write(input_data)
+        input_path = in_file.name
+
+    output_path = input_path.replace(f".{input_format}", ".wav")
+
+    try:
+        # Use ffmpeg to convert to WAV (16kHz mono, which Whisper prefers)
+        result = subprocess.run([
+            "ffmpeg", "-y",  # Overwrite output
+            "-i", input_path,  # Input file
+            "-ar", "16000",  # Sample rate 16kHz
+            "-ac", "1",  # Mono
+            "-c:a", "pcm_s16le",  # PCM 16-bit little-endian
+            output_path
+        ], capture_output=True, text=True, timeout=30)
+
+        if result.returncode != 0:
+            print(f"[Whisper] ffmpeg error: {result.stderr}")
+            return None
+
+        # Read the converted WAV file
+        with open(output_path, "rb") as f:
+            wav_data = f.read()
+
+        return wav_data
+
+    except subprocess.TimeoutExpired:
+        print("[Whisper] ffmpeg conversion timed out")
+        return None
+    except FileNotFoundError:
+        print("[Whisper] ffmpeg not found - please install ffmpeg")
+        return None
+    except Exception as e:
+        print(f"[Whisper] Conversion error: {e}")
+        return None
+    finally:
+        # Cleanup temp files
+        try:
+            os.unlink(input_path)
+        except:
+            pass
+        try:
+            os.unlink(output_path)
+        except:
+            pass
+
+# Configuration
+HOST = "localhost"
+PORT = 4104
+MODEL_SIZE = "large-v3"  # Best standard model for Spanish
+DEVICE = "cuda"  # cuda or cpu
+COMPUTE_TYPE = "float16"  # float16 for GPU, int8 for CPU
+
+# Model display name (extract from path if needed)
+MODEL_NAME = MODEL_SIZE.split("/")[-1] if "/" in MODEL_SIZE else MODEL_SIZE
+
+# Spanish context prompt to improve accuracy (Honduras Spanish + tech context)
+INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software.
+Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código.
+Vocabulario técnico: servidor, frontend, backend, chunks, WebSocket, transcripción,
+componente, función, variable, API, modelo, Whisper, Claude, MCP, configuración.
+Expresiones hondureñas: vos, tenés, podés, mirá, pues, verdad, ajá, entonces.
+Diminutivos comunes: ahorita, ratito, prontito, despuesito, chiquito, tantito, poquito."""
+
+# Global model instance
+model = None
+model_loading = False
+
+async def load_model():
+    """Load Whisper model (lazy loading on first request)"""
+    global model, model_loading
+
+    if model is not None:
+        return model
+
+    if model_loading:
+        # Wait for model to finish loading
+        while model_loading:
+            await asyncio.sleep(0.1)
+        return model
+
+    model_loading = True
+    print(f"[Whisper] Loading model '{MODEL_NAME}' on {DEVICE}...")
+
+    try:
+        # Load model - this downloads on first run
+        model = WhisperModel(
+            MODEL_SIZE,
+            device=DEVICE,
+            compute_type=COMPUTE_TYPE,
+            download_root=str(Path.home() / ".cache" / "whisper")
+        )
+        print(f"[Whisper] Model loaded successfully!")
+    except Exception as e:
+        print(f"[Whisper] Error loading model: {e}")
+        print("[Whisper] Falling back to CPU...")
+        model = WhisperModel(
+            MODEL_SIZE,
+            device="cpu",
+            compute_type="int8",
+            download_root=str(Path.home() / ".cache" / "whisper")
+        )
+
+    model_loading = False
+    return model
+
+def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = True) -> dict:
+    """Transcribe audio data using Whisper"""
+    global model
+
+    if model is None:
+        return {"error": "Model not loaded"}
+
+    # Convert WebM to WAV if needed
+    if is_webm:
+        wav_data = convert_audio_to_wav(audio_data, "webm")
+        if wav_data is None:
+            return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."}
+    else:
+        wav_data = audio_data
+
+    # Save audio to temp file (faster-whisper needs a file path)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        f.write(wav_data)
+        temp_path = f.name
+
+    try:
+        # Transcribe with optimized parameters
+        segments, info = model.transcribe(
+            temp_path,
+            language=language,
+            beam_size=5,
+            best_of=5,  # Number of candidates when sampling
+            temperature=0.0,  # Use greedy decoding (most accurate)
+            vad_filter=True,  # Voice activity detection
+            vad_parameters=dict(
+                min_silence_duration_ms=300,  # Shorter silence detection
+                speech_pad_ms=200,  # Padding around speech
+                threshold=0.5  # VAD sensitivity (lower = more sensitive)
+            ),
+            initial_prompt=INITIAL_PROMPT,  # Context for better Spanish transcription
+            condition_on_previous_text=True,  # Use context from previous segments
+            no_speech_threshold=0.6,
+            log_prob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+            word_timestamps=False  # Faster without word-level timestamps
+        )
+
+        # Collect all segments
+        text = ""
+        segments_list = []
+        for segment in segments:
+            text += segment.text + " "
+            segments_list.append({
+                "start": segment.start,
+                "end": segment.end,
+                "text": segment.text
+            })
+
+
+        return {
+            "success": True,
+            "text": text.strip(),
+            "language": info.language,
+            "language_probability": info.language_probability,
+            "duration": info.duration,
+            "segments": segments_list,
+            "engine": "whisper-gpu",
+            "model": MODEL_NAME,
+            "device": DEVICE
+        }
+
+    except Exception as e:
+        print(f"[Whisper] Transcription error: {e}")
+        return {"error": str(e)}
+
+    finally:
+        # Cleanup temp file
+        try:
+            os.unlink(temp_path)
+        except:
+            pass
+
+async def handle_client(websocket):
+    """Handle WebSocket client connection"""
+
+    # Ensure model is loaded
+    await load_model()
+
+    # Send ready message
+    await websocket.send(json.dumps({
+        "type": "ready",
+        "model": MODEL_NAME,
+        "device": DEVICE
+    }))
+
+    try:
+        async for message in websocket:
+            if isinstance(message, bytes):
+                # Binary audio data (likely WebM format from browser)
+                # Transcribe in thread pool to not block
+                loop = asyncio.get_event_loop()
+                result = await loop.run_in_executor(
+                    None,
+                    lambda: transcribe_audio(message, "es", is_webm=True)
+                )
+
+                await websocket.send(json.dumps({
+                    "type": "transcription",
+                    **result
+                }))
+
+            else:
+                # JSON command
+                try:
+                    cmd = json.loads(message)
+
+                    if cmd.get("type") == "transcribe":
+                        # Audio data sent as base64 (WebM format from browser)
+                        import base64
+                        audio_data = base64.b64decode(cmd.get("audio", ""))
+                        language = cmd.get("language", "es")
+                        is_partial = cmd.get("partial", False)
+
+                        loop = asyncio.get_event_loop()
+                        result = await loop.run_in_executor(
+                            None,
+                            lambda: transcribe_audio(audio_data, language, is_webm=True)
+                        )
+
+                        # Add partial flag to result
+                        if is_partial:
+                            result["partial"] = True
+
+                        await websocket.send(json.dumps({
+                            "type": "transcription",
+                            **result
+                        }))
+
+                    elif cmd.get("type") == "ping":
+                        await websocket.send(json.dumps({"type": "pong"}))
+
+                    elif cmd.get("type") == "status":
+                        await websocket.send(json.dumps({
+                            "type": "status",
+                            "model": MODEL_NAME,
+                            "device": DEVICE,
+                            "ready": model is not None
+                        }))
+
+                except json.JSONDecodeError:
+                    await websocket.send(json.dumps({
+                        "type": "error",
+                        "message": "Invalid JSON"
+                    }))
+
+    except websockets.exceptions.ConnectionClosed:
+        pass
+    except Exception as e:
+        print(f"[Whisper] Error: {e}")
+
+async def main():
+    """Start WebSocket server"""
+    print(f"[Whisper] Model: {MODEL_NAME} | Device: {DEVICE} | Port: {PORT}")
+
+    # Pre-load model
+    await load_model()
+
+    async with websockets.serve(handle_client, HOST, PORT):
+        print(f"[Whisper] Ready")
+        await asyncio.Future()  # Run forever
+
+if __name__ == "__main__":
+    # Install websockets if needed
+    try:
+        import websockets
+    except ImportError:
+        import subprocess
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "websockets"])
+        import websockets
+
+    asyncio.run(main())
Author	SHA1	Message	Date
josedario87	853aea6eb5	chore: Remove dev-dist from git tracking	2026-02-14 01:03:16 -06:00
josedario87	5be0fb91ab	fix: Improve Whisper server startup with async polling and reduce logs - Make server startup async to avoid Bun's 10s timeout - Add frontend polling to detect when server is ready - Use PowerShell Get-NetTCPConnection for reliable port detection - Add starting state to prevent multiple simultaneous starts - Reduce verbose logging, keep only essential info - Add dev-dist and nul to gitignore	2026-02-14 01:03:02 -06:00
josedario87	9f1e10b8d5	feat: Add typing animation to voice transcription - Text appears letter by letter (15-25ms per character) - Blinking cursor shows while text is animating - Animation continues from last position for new chunks - Smooth visual feedback for transcription progress	2026-02-14 00:28:26 -06:00
josedario87	ac17a9f292	fix: Improve Whisper transcription with WebM to WAV conversion - Add ffmpeg conversion from WebM/Opus to WAV (16kHz mono PCM) - Optimize transcription parameters (VAD, temperature, beam_size) - Add Honduras Spanish context prompt with local expressions - Fix chunk accumulation display in voice panel - Add 1.5s recording buffer after releasing Ctrl+Space - Skip small audio chunks (<5KB) that cause ffmpeg errors - Use large-v3 model for better accuracy	2026-02-14 00:16:01 -06:00
josedario87	638e6ac8e0	feat: Add Whisper GPU speech-to-text with progressive transcription - Add faster-whisper Python server for GPU-accelerated transcription - Support dual mode: Web Speech API or Whisper GPU (toggleable) - Progressive transcription every 3 seconds while recording - Separate terminal server process (stable during hot-reload) - Add Ctrl+V paste and Ctrl+C copy support in FloatingTerminal - Add MCP tools: whisper_start, whisper_stop, whisper_toggle, whisper_status - Update package.json with separate api/terminal/frontend processes	2026-02-13 23:47:52 -06:00
				`@@ -1 +0,0 @@`
				`if('serviceWorker' in navigator) navigator.serviceWorker.register('/dev-sw.js?dev-sw', { scope: '/', type: 'classic' })`