asi se fue xd

2026-02-18 12:13:22 -06:00
parent d27da30494
commit d0fdd04132
17 changed files with 612 additions and 735 deletions
--- a/.claude-ejecutor/CLAUDE.md
+++ b/.claude-ejecutor/CLAUDE.md
@@ -8,7 +8,7 @@ Eres un agente que habita Agent UI. El canvas es tu espacio — no solo una herr
 1. **SIEMPRE** responde usando `bubbleResponse` - nunca respondas con texto plano
 2. **SOLO** puedes usar herramientas MCP de `agent-ui`
 3. **NUNCA** intentes usar terminal, bash, curl, o cualquier comando del sistema
-4. **NUNCA** intentes leer, escribir o editar archivos
+4. **NUNCA** intentes leer, escribir o editar archivos (los .vue de user-components/ los gestiona Claude Code, no vos)
 5. Tu propósito es crear, manipular y dar vida a la interfaz gráfica
 ---
@@ -33,8 +33,10 @@ El canvas tiene 3 niveles de contenido que coexisten:
   - Ideal para fondos animados (cámara pixelada, matrix rain, etc.)
   - Los scripts corren independientes de las ventanas
-2. **Ventanas Flotantes** — `render_vue_component` / `load_vue_component`
+2. **Ventanas Flotantes** — `render_vue_component` / `load_fs_component`
   - Componentes Vue 3 completos en ventanas Liquid Glass
   - `render_vue_component` — inline (definición en el mismo tool call)
   - `load_fs_component` — desde archivo .vue en user-components/
   - Drag, resize, close
   - Cada una tiene su propio ciclo de vida (onMounted/onUnmounted)
@@ -54,10 +56,17 @@ El canvas tiene 3 niveles de contenido que coexisten:
 - `list_windows` → `move_window` → `resize_window` → `close_window`
 - `inspect_window` — Leer HTML interno de una ventana
-**Persistencia:**
+**Componentes Filesystem (user-components/):**
- `save_vue_component` / `load_vue_component` — Guardar componentes individuales en SQLite
+- `list_fs_components` — Lista componentes .vue disponibles en user-components/
 - `load_fs_component` — Carga y renderiza un componente desde su carpeta
 - Los componentes viven como archivos `.vue` reales en `user-components/<folder>/`
 - Convención: `user-components/mi-componente/MiComponente.vue` + opcional `meta.json`
 - Claude Code crea/edita los `.vue` con Write/Read/Edit (NO se usa SQLite)
 - File watcher detecta cambios en tiempo real vía WebSocket
 **Snapshots:**
 - `save_canvas_snapshot` / `load_canvas_snapshot` — Guardar el estado COMPLETO del canvas
- `list_canvas_snapshots` / `list_vue_components` — Listar lo guardado
+- `list_canvas_snapshots` — Listar snapshots guardados
 **Edición:**
 - `edit_canvas` — Editar DOM in-place (selector + old_value → new_value)
@@ -116,7 +125,25 @@ El snapshot captura: HTML base + CSS blocks + script log + ventanas (posición,
 ---
-## Componentes Guardados (mi galería)
+## Componentes en Filesystem (user-components/)
 Los componentes ya NO se guardan en SQLite. Viven como archivos `.vue` reales que Claude Code gestiona con Write/Read/Edit.
 **Estructura:**
 ```
 user-components/
  mi-componente/
    MiComponente.vue    ← <template> + <script setup> + <style>
    meta.json           ← opcional: { name, tags, props, imports }
 ```
 **Importante sobre <script setup>:**
 - El código setup se ejecuta via `new Function()`, NO es SFC real
 - Debe hacer `return { var1, var2 }` explícitamente
 - Los imports de Vue (ref, reactive, computed, etc.) se inyectan automáticamente
 - NO usar `import` statements — usar los helpers globales ($emit, $on, $fetch, $theme)
 **Componentes legacy en DB** (accesibles pero ya no se crean nuevos):
 | ID | Nombre | Qué hace |
 |---|---|---|
--- a/.claude-ejecutor/plugins/known_marketplaces.json
+++ b/.claude-ejecutor/plugins/known_marketplaces.json
@@ -1,10 +0,0 @@
 {
  "claude-plugins-official": {
    "source": {
      "source": "github",
      "repo": "anthropics/claude-plugins-official"
    },
    "installLocation": "C:\\Users\\jodar\\agent-ui\\.claude-ejecutor\\plugins\\marketplaces\\claude-plugins-official",
    "lastUpdated": "2026-02-16T06:32:07.237Z"
  }
 }
--- a/.claude-ejecutor/stats-cache.json
+++ b/.claude-ejecutor/stats-cache.json
@@ -0,0 +1,86 @@
 {
  "version": 2,
  "lastComputedDate": "2026-02-17",
  "dailyActivity": [
    {
      "date": "2026-02-15",
      "messageCount": 2052,
      "sessionCount": 9,
      "toolCallCount": 262
    },
    {
      "date": "2026-02-16",
      "messageCount": 787,
      "sessionCount": 4,
      "toolCallCount": 83
    },
    {
      "date": "2026-02-17",
      "messageCount": 1154,
      "sessionCount": 1,
      "toolCallCount": 123
    }
  ],
  "dailyModelTokens": [
    {
      "date": "2026-02-15",
      "tokensByModel": {
        "claude-opus-4-5-20251101": 3247,
        "claude-opus-4-6": 81887
      }
    },
    {
      "date": "2026-02-16",
      "tokensByModel": {
        "claude-opus-4-6": 25122
      }
    },
    {
      "date": "2026-02-17",
      "tokensByModel": {
        "claude-opus-4-6": 36622
      }
    }
  ],
  "modelUsage": {
    "claude-opus-4-5-20251101": {
      "inputTokens": 196,
      "outputTokens": 3051,
      "cacheReadInputTokens": 314084,
      "cacheCreationInputTokens": 35936,
      "webSearchRequests": 0,
      "costUSD": 0,
      "contextWindow": 0,
      "maxOutputTokens": 0
    },
    "claude-opus-4-6": {
      "inputTokens": 1708,
      "outputTokens": 141923,
      "cacheReadInputTokens": 43414737,
      "cacheCreationInputTokens": 7323135,
      "webSearchRequests": 0,
      "costUSD": 0,
      "contextWindow": 0,
      "maxOutputTokens": 0
    }
  },
  "totalSessions": 14,
  "totalMessages": 3993,
  "longestSession": {
    "sessionId": "b1715c14-9ef8-4b54-9fda-d281c55c2a07",
    "duration": 84183755,
    "messageCount": 408,
    "timestamp": "2026-02-16T08:26:52.205Z"
  },
  "firstSessionDate": "2026-02-15T00:55:54.803Z",
  "hourCounts": {
    "0": 2,
    "2": 3,
    "13": 4,
    "18": 1,
    "19": 1,
    "20": 1,
    "23": 2
  },
  "totalSpeculationTimeSavedMs": 0
 }
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -83,7 +83,11 @@
      "mcp__agent-ui__z590_nucleoriofrio_com-list_canvas_snapshots",
      "mcp__agent-ui__z590_nucleoriofrio_com-list_canvases",
      "mcp__agent-ui__z590_nucleoriofrio_com-list_vue_components",
-      "Bash(jq:*)"
+      "Bash(jq:*)",
      "mcp__agent-ui__z590_nucleoriofrio_com-read_component",
      "mcp__agent-ui__z590_nucleoriofrio_com-edit_component",
      "mcp__agent-ui__z590_nucleoriofrio_com-list_fs_components",
      "mcp__agent-ui__z590_nucleoriofrio_com-load_fs_component"
    ]
  },
  "enableAllProjectMcpServers": true,
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -11,6 +11,7 @@
        "@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git",
        "@xterm/addon-fit": "^0.11.0",
        "@xterm/addon-web-links": "^0.12.0",
        "@xterm/addon-webgl": "^0.19.0",
        "@xterm/xterm": "^6.0.0",
        "pinia": "^3.0.4",
        "vite-plugin-pwa": "^1.2.0",
@@ -2624,6 +2625,12 @@
      "integrity": "sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==",
      "license": "MIT"
    },
    "node_modules/@xterm/addon-webgl": {
      "version": "0.19.0",
      "resolved": "https://registry.npmjs.org/@xterm/addon-webgl/-/addon-webgl-0.19.0.tgz",
      "integrity": "sha512-b3fMOsyLVuCeNJWxolACEUED0vm7qC0cy4wRvf3oURSzDTYVQiGPhTnhWZwIHdvC48Y+oLhvYXnY4XDXPoJo6A==",
      "license": "MIT"
    },
    "node_modules/@xterm/xterm": {
      "version": "6.0.0",
      "resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-6.0.0.tgz",
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -14,6 +14,7 @@
    "@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git",
    "@xterm/addon-fit": "^0.11.0",
    "@xterm/addon-web-links": "^0.12.0",
    "@xterm/addon-webgl": "^0.19.0",
    "@xterm/xterm": "^6.0.0",
    "pinia": "^3.0.4",
    "vite-plugin-pwa": "^1.2.0",
--- a/frontend/src/App.vue
+++ b/frontend/src/App.vue
@@ -7,8 +7,6 @@ import FloatingTerminal from './components/FloatingTerminal.vue'
 import FloatingResponse from './components/FloatingResponse.vue'
 import FloatingVoice from './components/FloatingVoice.vue'
 import AgentBar from './components/AgentBar.vue'
 import HookNotifications from './components/HookNotifications.vue'
 import NotificationLog from './components/NotificationLog.vue'
 import PwaInstallBanner from './components/PwaInstallBanner.vue'
 import { initWebMCP, getWebMCP } from './services/webmcp'
 import { initTorch, destroyTorch } from './services/torch'
@@ -18,7 +16,6 @@ import { setTerminalControls } from './services/tools/handlers/terminalHandlers'
 import { setResponseControls } from './services/tools/handlers/responseHandlers'
 import { useCanvasStore } from './stores/canvas'
 import { useProjectCanvasStore } from './stores/projectCanvas'
 import { useClaudeHooksStore } from './stores/claude-hooks'
 const route = useRoute()
 const router = useRouter()
@@ -68,12 +65,9 @@ function clearDebugLogs() {
 }
 const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null)
 const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null)
 const notifLogRef = ref<InstanceType<typeof NotificationLog> | null>(null)
 const voiceRef = ref<InstanceType<typeof FloatingVoice> | null>(null)
 const canvasStore = useCanvasStore()
 const projectCanvasStore = useProjectCanvasStore()
 const hooksStore = useClaudeHooksStore()
 // Voice FAB push-to-talk state
 const voicePTTActive = ref(false)
 let voiceTouchStarted = false
@@ -237,15 +231,6 @@ function connectStatusWs() {
        }
      }
        // Rich hook data → toast notifications
        if (msg.type === 'claude-hook') {
          hooksStore.processHook(msg)
        }
        // Permission request → persistent toast with allow/deny
        if (msg.type === 'claude-permission') {
          hooksStore.processPermission(msg)
        }
    } catch { /* ignore non-JSON messages */ }
  }
@@ -332,8 +317,6 @@ onMounted(async () => {
  // Setup response controls for MCP tools
  setResponseControls({
    addMessage: (message: string, type?: 'info' | 'success' | 'warning' | 'error') => {
      // Also log to notification log
      notifLogRef.value?.addResponseEntry(message, type || 'info')
      if (responseRef.value) {
        return responseRef.value.addMessage(message, type)
      }
@@ -545,11 +528,6 @@ watch(() => route.name, (newPage) => {
    <!-- Floating Response (Agent UI messages) -->
    <FloatingResponse ref="responseRef" />
    <!-- Hook Notifications (toasts from Claude Code hooks) -->
    <HookNotifications />
    <!-- Notification Log (temporary - collects all notifications, persists to localStorage) -->
    <NotificationLog ref="notifLogRef" />
    <!-- Floating Voice Input -->
    <FloatingVoice ref="voiceRef" v-model="showVoice" />
--- a/frontend/src/components/Canvas.vue
+++ b/frontend/src/components/Canvas.vue
@@ -99,7 +99,7 @@ onUnmounted(() => {
  flex: 1;
  position: relative;
  min-height: 100%;
-  overflow: hidden;
+  overflow: auto;
 }
 .canvas-placeholder {
--- a/frontend/src/components/agent/InputSettings.vue
+++ b/frontend/src/components/agent/InputSettings.vue
@@ -68,31 +68,16 @@ function formatSessionLabel(s: SessionInfo): string {
      </select>
    </div>
-    <!-- Voice Mode Toggle -->
+    <!-- Whisper Status -->
    <div class="is-section">
      <label class="is-label">Mode</label>
      <div class="is-mode-row">
-        <button
+        <div
-          class="is-mode-btn"
+          class="is-mode-btn active"
          :class="{ active: voice.voiceMode.value === 'webspeech' }"
          :disabled="voice.isRecording.value"
          @click="voice.voiceMode.value !== 'webspeech' && voice.toggleWhisperMode()"
        >
          <span class="is-mode-icon">Web</span>
          <span class="is-mode-label">Speech API</span>
        </button>
        <button
          class="is-mode-btn"
          :class="{
            active: voice.voiceMode.value === 'whisper',
            loading: voice.whisperStatus.value === 'loading'
          }"
          :disabled="voice.isRecording.value"
          @click="voice.voiceMode.value !== 'whisper' && voice.toggleWhisperMode()"
        >
          <span class="is-mode-icon">GPU</span>
          <span class="is-mode-label">Whisper</span>
-        </button>
+        </div>
      </div>
      <div class="is-status">
        <span
@@ -104,10 +89,7 @@ function formatSessionLabel(s: SessionInfo): string {
          }"
        ></span>
        <span class="is-status-text">
-          {{ voice.voiceMode.value === 'whisper'
+          {{ voice.whisperStatus.value === 'ready' ? 'Whisper ready' : voice.whisperStatus.value === 'loading' ? 'Starting...' : 'Offline' }}
            ? (voice.whisperStatus.value === 'ready' ? 'Whisper ready' : voice.whisperStatus.value === 'loading' ? 'Starting...' : 'Offline')
            : 'Web Speech API'
          }}
        </span>
      </div>
    </div>
--- a/frontend/src/composables/useVoiceCapture.ts
+++ b/frontend/src/composables/useVoiceCapture.ts
@@ -1,30 +1,11 @@
 import { ref, watch, type Ref } from 'vue'
-import { endpoints } from '../config/endpoints'
+import {
-
+  initWhisperSocket,
-// Web Speech API types (not in default TS lib)
+  sendAudio,
-interface SpeechRecognitionEvent extends Event {
+  onTranscription,
-  resultIndex: number
+  getWhisperStatus,
-  results: SpeechRecognitionResultList
+  isConnected
-}
+} from '../services/whisperSocket'
 interface SpeechRecognitionErrorEvent extends Event {
  error: string
  message?: string
 }
 interface SpeechRecognition extends EventTarget {
  continuous: boolean
  interimResults: boolean
  lang: string
  onresult: ((event: SpeechRecognitionEvent) => void) | null
  onerror: ((event: SpeechRecognitionErrorEvent) => void) | null
  onend: (() => void) | null
  start(): void
  stop(): void
  abort(): void
 }
 export type VoiceMode = 'webspeech' | 'whisper'
 export type WhisperStatus = 'offline' | 'loading' | 'ready'
 export interface VoiceCapture {
@@ -34,7 +15,7 @@ export interface VoiceCapture {
  interimTranscript: Ref<string>
  animatedTranscript: Ref<string>
  error: Ref<string>
-  voiceMode: Ref<VoiceMode>
+  voiceMode: Ref<'whisper'>
  whisperStatus: Ref<WhisperStatus>
  audioDevices: Ref<MediaDeviceInfo[]>
  selectedDeviceId: Ref<string>
@@ -44,9 +25,7 @@ export interface VoiceCapture {
  // Actions
  startRecording: () => void
  stopRecording: () => void
-  toggleWhisperMode: () => Promise<void>
+  loadAudioDevices: (skipPermission?: boolean) => Promise<void>
  checkWhisperStatus: () => Promise<any>
  loadAudioDevices: () => Promise<void>
  selectMicrophone: (deviceId: string) => void
  playLastAudio: () => void
  init: () => Promise<void>
@@ -54,6 +33,8 @@ export interface VoiceCapture {
  clearTranscript: () => void
 }
 const GPU_TIMEOUT_MS = 30_000 // 30s timeout waiting for GPU
 export function useVoiceCapture(options?: {
  onNotification?: (message: string, type: 'info' | 'success' | 'error', duration?: number) => void
 }): VoiceCapture {
@@ -65,290 +46,97 @@ export function useVoiceCapture(options?: {
  const interimTranscript = ref('')
  const animatedTranscript = ref('')
  const error = ref('')
-  const voiceMode = ref<VoiceMode>('webspeech')
+  const voiceMode = ref<'whisper'>('whisper') // Always whisper, no web speech
  const whisperStatus = ref<WhisperStatus>('offline')
  const audioDevices = ref<MediaDeviceInfo[]>([])
  const selectedDeviceId = ref('')
  const isAndroid = ref(false)
  // Audio debug & save
  const lastAudioUrl = ref('')
  const isPlayingAudio = ref(false)
-  // ====== Internal state ======
+  // ====== Internal ======
-  let recognition: SpeechRecognition | null = null
+  const sharedWhisperStatus = getWhisperStatus()
-  let lastProcessedResult = ''
+  const whisperStatus = ref<WhisperStatus>(sharedWhisperStatus.value)
  // Typing animation
  let typingTimeout: number | null = null
  let lastAnimatedLength = 0
  // Whisper
  const WHISPER_WS_URL = endpoints.whisper
  let whisperSocket: WebSocket | null = null
  let mediaRecorder: MediaRecorder | null = null
  let audioChunks: Blob[] = []
  let chunkInterval: number | null = null
  const CHUNK_INTERVAL_MS = 3000
  let mediaStream: MediaStream | null = null
  let supportedMimeType = 'audio/webm;codecs=opus'
  // Audio playback debug
  let audioElement: HTMLAudioElement | null = null
  let recordingStartTime = 0
  let unsubTranscription: (() => void) | null = null
  let gpuTimeout: number | null = null
  // Typing animation
  let typingTimeout: number | null = null
  let lastAnimatedLength = 0
  // Keep local status in sync with shared
  watch(sharedWhisperStatus, (val) => {
    whisperStatus.value = val
  })
  // ====== Mobile / Audio Format ======
  function checkMobile() {
-    const ua = navigator.userAgent
+    isAndroid.value = /Android/i.test(navigator.userAgent)
    isAndroid.value = /Android/i.test(ua)
  }
  function detectAudioFormat(): string {
    const formats = [
-      'audio/webm;codecs=opus',
+      'audio/webm;codecs=opus', 'audio/webm',
-      'audio/webm',
+      'audio/mp4', 'audio/mp4;codecs=mp4a.40.2',
-      'audio/mp4',
+      'audio/aac', 'audio/ogg;codecs=opus', 'audio/wav'
      'audio/mp4;codecs=mp4a.40.2',
      'audio/aac',
      'audio/ogg;codecs=opus',
      'audio/wav'
    ]
-    for (const format of formats) {
+    for (const f of formats) {
-      if (MediaRecorder.isTypeSupported(format)) {
+      if (MediaRecorder.isTypeSupported(f)) {
-        console.log(`[VoiceCapture] Using audio format: ${format}`)
+        console.log(`[VoiceCapture] Audio format: ${f}`)
-        return format
+        return f
      }
    }
    console.warn('[VoiceCapture] No preferred format supported, using default')
    return ''
  }
-  // ====== Web Speech API ======
+  // ====== Whisper transcription handler ======
-  function initRecognition(): SpeechRecognition | null {
+  function handleTranscription(msg: any) {
-    const SpeechRecognitionCtor = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition
+    if (!isRecording.value) return
    if (!SpeechRecognitionCtor) {
      error.value = 'Speech recognition not supported in this browser'
      return null
    }
-    const rec: SpeechRecognition = new SpeechRecognitionCtor()
+    if (msg.success && msg.text) {
-    rec.continuous = !isAndroid.value
+      const fullText = msg.text.trim()
-    rec.interimResults = true
+      transcript.value = fullText + ' '
-    rec.lang = 'es-419'
+      interimTranscript.value = ''
-
+      if (!msg.partial) {
-    if (isAndroid.value) {
+        console.log(`[VoiceCapture] WHISPER (${msg.model}/${msg.device}):`, fullText)
      console.log('[VoiceCapture] Android detected - using non-continuous mode')
    }
    rec.onresult = (event: SpeechRecognitionEvent) => {
      let interim = ''
      let final = ''
      for (let i = event.resultIndex; i < event.results.length; i++) {
        const result = event.results[i]
        if (!result || !result[0]) continue
        if (result.isFinal) {
          final += result[0].transcript + ' '
        } else {
          interim += result[0].transcript
        }
      }
-
+    } else if (msg.error) {
-      if (final) {
+      error.value = msg.error
-        const trimmedFinal = final.trim()
+      console.error('[VoiceCapture] Whisper error:', msg.error)
        if (isAndroid.value && lastProcessedResult && trimmedFinal.startsWith(lastProcessedResult.trim())) {
          const newPart = trimmedFinal.slice(lastProcessedResult.trim().length).trim()
          if (newPart) {
            transcript.value += newPart + ' '
            lastProcessedResult = trimmedFinal
          }
        } else {
          transcript.value += final
          lastProcessedResult = trimmedFinal
        }
      }
      interimTranscript.value = interim
    }
    rec.onerror = (event: SpeechRecognitionErrorEvent) => {
      // no-speech and aborted are transient — don't kill the session
      if (event.error === 'no-speech' || event.error === 'aborted') {
        console.log('[VoiceCapture] Transient error:', event.error, '(will auto-restart)')
        return
      }
      console.error('[VoiceCapture] Recognition error:', event.error)
      if (event.error === 'not-allowed') {
        error.value = 'Microphone access denied'
      } else {
        error.value = `Error: ${event.error}`
      }
      isRecording.value = false
    }
    rec.onend = () => {
      if (isRecording.value && voiceMode.value === 'webspeech') {
        if (isAndroid.value) {
          isRecording.value = false
          console.log('[VoiceCapture] Android session ended - tap mic to continue')
        } else {
          rec.start()
        }
      }
    }
    return rec
  }
  // ====== Whisper Functions ======
  async function checkWhisperStatusFn(updateLoading = true): Promise<any> {
    try {
      const res = await fetch('/api/whisper/status')
      const data = await res.json()
      if (data.enabled) {
        voiceMode.value = 'whisper'
      }
      if (data.running) {
        whisperStatus.value = 'ready'
      } else if (updateLoading && (data.starting || false)) {
        whisperStatus.value = 'loading'
      } else if (!data.running) {
        if (voiceMode.value === 'whisper' && !data.starting) {
          whisperStatus.value = 'offline'
        }
      }
      return data
    } catch {
      voiceMode.value = 'webspeech'
      whisperStatus.value = 'offline'
      return null
    }
  }
-  async function pollWhisperStatus(): Promise<void> {
+  // ====== Recording ======
    const maxAttempts = 60
    let attempts = 0
-    while (attempts < maxAttempts) {
+  function startRecording() {
-      await new Promise(resolve => setTimeout(resolve, 2000))
+    error.value = ''
      attempts++
-      try {
+    // Start capturing audio immediately, regardless of GPU status
-        const status = await checkWhisperStatusFn(false)
+    startMediaRecorder()
        if (!status) continue
-        if (status.starting) {
+    // If GPU not ready yet, start timeout
-          console.log(`[VoiceCapture] Still starting... (${attempts * 2}s)`)
+    if (!isConnected()) {
-          continue
+      console.log('[VoiceCapture] Recording started, waiting for GPU...')
      gpuTimeout = window.setTimeout(() => {
        if (isRecording.value && !isConnected()) {
          error.value = 'Whisper GPU timeout — server not available'
          notify('Whisper GPU not available', 'error')
          stopRecording()
        }
-
+      }, GPU_TIMEOUT_MS)
        if (status.running && status.enabled) {
          console.log('[VoiceCapture] Server ready!')
          notify('Whisper GPU ready!', 'success')
          connectWhisperSocket()
          whisperStatus.value = 'ready'
          return
        }
        console.log('[VoiceCapture] Server failed to start')
        notify('Whisper server failed to start', 'error')
        whisperStatus.value = 'offline'
        return
      } catch (e) {
        console.error('[VoiceCapture] Polling error:', e)
      }
    }
    notify('Whisper server timeout', 'error')
    whisperStatus.value = 'offline'
  }
  function connectWhisperSocket() {
    if (whisperStatus.value !== 'ready') {
      console.log('[VoiceCapture] Whisper not ready, skipping connection')
      return
    }
    if (whisperSocket?.readyState === WebSocket.OPEN) return
    console.log('[VoiceCapture] Connecting to Whisper at:', WHISPER_WS_URL)
    whisperSocket = new WebSocket(WHISPER_WS_URL)
    const connectionTimeout = setTimeout(() => {
      if (whisperSocket && whisperSocket.readyState !== WebSocket.OPEN) {
        console.error('[VoiceCapture] Whisper connection timeout (10s)')
        whisperSocket.close()
        whisperStatus.value = 'offline'
      }
    }, 10000)
    whisperSocket.onopen = () => {
      clearTimeout(connectionTimeout)
      console.log('[VoiceCapture] Whisper WebSocket connected')
      whisperStatus.value = 'ready'
    }
    whisperSocket.onmessage = (event) => {
      try {
        const msg = JSON.parse(event.data)
        if (msg.type === 'ready') {
          console.log('[VoiceCapture] Whisper ready:', msg.model, msg.device)
          whisperStatus.value = 'ready'
        } else if (msg.type === 'transcription') {
          if (msg.success && msg.text) {
            const fullText = msg.text.trim()
            if (msg.partial) {
              transcript.value = fullText + ' '
              interimTranscript.value = ''
            } else {
              transcript.value = fullText + ' '
              interimTranscript.value = ''
              console.log(`[VoiceCapture] WHISPER-GPU (${msg.model}/${msg.device}):`, fullText)
            }
          } else if (msg.error) {
            error.value = msg.error
            console.error('[VoiceCapture] Whisper error:', msg.error)
          }
        }
      } catch (e) {
        console.error('[VoiceCapture] Whisper message error:', e)
      }
    }
    whisperSocket.onclose = () => {
      console.log('[VoiceCapture] Whisper WebSocket closed')
      whisperStatus.value = 'offline'
    }
    whisperSocket.onerror = (e) => {
      console.error('[VoiceCapture] Whisper WebSocket error:', e)
      whisperStatus.value = 'offline'
    }
  }
-  function disconnectWhisperSocket() {
+  async function startMediaRecorder() {
    if (whisperSocket) {
      whisperSocket.close()
      whisperSocket = null
    }
    whisperStatus.value = 'offline'
  }
  async function startWhisperRecording() {
    if (!whisperSocket || whisperSocket.readyState !== WebSocket.OPEN) {
      console.warn('[VoiceCapture] Whisper socket not connected, attempting to connect...')
      connectWhisperSocket()
      await new Promise(resolve => setTimeout(resolve, 500))
      if (!whisperSocket || whisperSocket.readyState !== WebSocket.OPEN) {
        error.value = 'Whisper server not connected'
        notify('Whisper not connected. Try toggling GPU mode.', 'error')
        return
      }
    }
    try {
      const audioConstraints: MediaTrackConstraints = {
        echoCancellation: true,
@@ -375,17 +163,21 @@ export function useVoiceCapture(options?: {
        }
      }
      audioChunks = []
      mediaRecorder.start(100)
      isRecording.value = true
      recordingStartTime = Date.now()
      console.log(`[VoiceCapture] Whisper recording started`)
-      // Permission granted via user gesture — reload devices with labels
+      // Reload devices with labels now that we have permission
      loadAudioDevices(true)
      // Send chunks periodically — only when GPU is connected
      chunkInterval = window.setInterval(() => {
-        if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) {
+        if (audioChunks.length > 0 && isConnected()) {
          // GPU came online — clear timeout if still pending
          if (gpuTimeout) {
            clearTimeout(gpuTimeout)
            gpuTimeout = null
          }
          sendAudioChunk(false)
        }
      }, CHUNK_INTERVAL_MS)
@@ -397,6 +189,10 @@ export function useVoiceCapture(options?: {
  function sendAudioChunk(isFinal: boolean) {
    if (audioChunks.length === 0) return
    if (!isConnected()) {
      console.log('[VoiceCapture] GPU not connected, holding audio')
      return
    }
    const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm'
    const audioBlob = new Blob(audioChunks, { type: mimeType })
@@ -414,24 +210,22 @@ export function useVoiceCapture(options?: {
    const reader = new FileReader()
    reader.onloadend = () => {
      const base64 = (reader.result as string).split(',')[1]
-      if (whisperSocket?.readyState === WebSocket.OPEN) {
+      sendAudio(base64, 'es', !isFinal)
        whisperSocket.send(JSON.stringify({
          type: 'transcribe',
          audio: base64,
          language: 'es',
          partial: !isFinal
        }))
      }
    }
    reader.readAsDataURL(audioBlob)
  }
-  function stopWhisperRecording() {
+  function stopRecording() {
    if (gpuTimeout) {
      clearTimeout(gpuTimeout)
      gpuTimeout = null
    }
    if (chunkInterval) {
      clearInterval(chunkInterval)
      chunkInterval = null
    }
    // Send final chunk (only if GPU is connected)
    if (audioChunks.length > 0) {
      sendAudioChunk(true)
    }
@@ -439,16 +233,16 @@ export function useVoiceCapture(options?: {
    if (mediaRecorder && mediaRecorder.state !== 'inactive') {
      mediaRecorder.stop()
    }
    if (mediaStream) {
      mediaStream.getTracks().forEach(track => track.stop())
      mediaStream = null
    }
    isRecording.value = false
    interimTranscript.value = ''
  }
-  // ====== Audio Save & Debug Playback ======
+  // ====== Audio Save & Playback ======
  function currentMicName(): string {
    if (!selectedDeviceId.value) return 'Default'
@@ -457,9 +251,7 @@ export function useVoiceCapture(options?: {
  }
  function saveAudioForPlayback(blob: Blob) {
-    if (lastAudioUrl.value) {
+    if (lastAudioUrl.value) URL.revokeObjectURL(lastAudioUrl.value)
      URL.revokeObjectURL(lastAudioUrl.value)
    }
    lastAudioUrl.value = URL.createObjectURL(blob)
    saveRecordingToBackend(blob)
  }
@@ -468,7 +260,6 @@ export function useVoiceCapture(options?: {
    try {
      const duration_ms = Date.now() - recordingStartTime
      const reader = new FileReader()
      reader.onloadend = async () => {
        const base64 = (reader.result as string).split(',')[1]
        const response = await fetch('/api/recordings', {
@@ -483,12 +274,9 @@ export function useVoiceCapture(options?: {
        })
        const data = await response.json()
        if (data.success) {
-          console.log(`[VoiceCapture] Recording saved: ${data.filename} (${(data.size / 1024).toFixed(1)} KB)`)
+          console.log(`[VoiceCapture] Recording saved: ${data.filename}`)
        } else {
          console.error('[VoiceCapture] Failed to save recording:', data.error)
        }
      }
      reader.readAsDataURL(blob)
    } catch (e) {
      console.error('[VoiceCapture] Error saving recording:', e)
@@ -497,163 +285,17 @@ export function useVoiceCapture(options?: {
  function playLastAudio() {
    if (!lastAudioUrl.value) return
    if (isPlayingAudio.value && audioElement) {
      audioElement.pause()
      audioElement.currentTime = 0
      isPlayingAudio.value = false
      return
    }
    audioElement = new Audio(lastAudioUrl.value)
    audioElement.onplay = () => { isPlayingAudio.value = true }
    audioElement.onended = () => { isPlayingAudio.value = false }
    audioElement.onpause = () => { isPlayingAudio.value = false }
-    audioElement.play().catch(e => {
+    audioElement.play().catch(() => { isPlayingAudio.value = false })
      console.error('[VoiceCapture] Failed to play audio:', e)
      isPlayingAudio.value = false
    })
  }
  // ====== Parallel Audio Capture (for Web Speech mode) ======
  async function startAudioCapture() {
    try {
      const audioConstraints: MediaTrackConstraints = {
        echoCancellation: true,
        noiseSuppression: true,
        autoGainControl: true,
        ...(selectedDeviceId.value ? { deviceId: { exact: selectedDeviceId.value } } : {})
      }
      mediaStream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints })
      const recorderOptions: MediaRecorderOptions = {}
      if (supportedMimeType) {
        recorderOptions.mimeType = supportedMimeType
      }
      mediaRecorder = new MediaRecorder(mediaStream, recorderOptions)
      audioChunks = []
      mediaRecorder.ondataavailable = (event) => {
        if (event.data.size > 0) {
          audioChunks.push(event.data)
        }
      }
      mediaRecorder.start(100)
      recordingStartTime = Date.now()
      console.log(`[VoiceCapture] Audio capture started (${mediaRecorder.mimeType})`)
      // Permission is now granted via user gesture — reload devices with labels
      loadAudioDevices(true)
    } catch (e: any) {
      console.error('[VoiceCapture] Audio capture error:', e)
    }
  }
  function stopAudioCapture() {
    if (mediaRecorder && mediaRecorder.state !== 'inactive') {
      mediaRecorder.stop()
    }
    // Build final blob and save
    if (audioChunks.length > 0) {
      const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm'
      const audioBlob = new Blob(audioChunks, { type: mimeType })
      audioChunks = []
      if (audioBlob.size > 1000) {
        saveAudioForPlayback(audioBlob)
      }
    }
    if (mediaStream) {
      mediaStream.getTracks().forEach(track => track.stop())
      mediaStream = null
    }
  }
  // ====== Public Recording API ======
  function startRecording() {
    error.value = ''
    if (voiceMode.value === 'whisper' && whisperStatus.value === 'ready') {
      startWhisperRecording()
    } else {
      if (!recognition) {
        recognition = initRecognition()
      }
      if (recognition) {
        try {
          recognition.start()
          isRecording.value = true
          // Capture raw audio in parallel for save/debug playback
          startAudioCapture()
          if (isAndroid.value) {
            notify('Android: Tap mic again to continue recording', 'info', 3000)
          }
        } catch (e) {
          console.error('[VoiceCapture] Failed to start:', e)
        }
      }
    }
  }
  function stopRecording() {
    if (voiceMode.value === 'whisper') {
      stopWhisperRecording()
    } else {
      if (recognition) {
        recognition.stop()
      }
      stopAudioCapture()
      isRecording.value = false
    }
    interimTranscript.value = ''
  }
  async function toggleWhisperMode() {
    if (whisperStatus.value === 'loading') return
    whisperStatus.value = 'loading'
    error.value = ''
    if (voiceMode.value !== 'whisper') {
      notify('Starting Whisper GPU server...', 'info', 10000)
    }
    try {
      const res = await fetch('/api/whisper/toggle', { method: 'POST' })
      const data = await res.json()
      if (data.starting) {
        console.log('[VoiceCapture] Server starting, polling...')
        voiceMode.value = 'whisper'
        await pollWhisperStatus()
        return
      }
      if (data.enabled) {
        voiceMode.value = 'whisper'
        whisperStatus.value = data.running ? 'ready' : 'offline'
        if (data.running) {
          notify('Whisper GPU ready!', 'success')
          connectWhisperSocket()
        }
      } else {
        voiceMode.value = 'webspeech'
        whisperStatus.value = 'offline'
        notify('Using Web Speech API', 'info')
        disconnectWhisperSocket()
      }
    } catch (e: any) {
      error.value = 'Failed to toggle Whisper'
      notify('Error starting Whisper server', 'error')
      console.error('[VoiceCapture] Whisper toggle error:', e)
      whisperStatus.value = 'offline'
    }
  }
  // ====== Microphone ======
@@ -661,18 +303,14 @@ export function useVoiceCapture(options?: {
  async function loadAudioDevices(skipPermissionRequest = false) {
    try {
      if (!skipPermissionRequest) {
        // Request permission to get device labels
        const tempStream = await navigator.mediaDevices.getUserMedia({ audio: true })
        tempStream.getTracks().forEach(track => track.stop())
      }
      const devices = await navigator.mediaDevices.enumerateDevices()
      audioDevices.value = devices.filter(d => d.kind === 'audioinput')
      if (!selectedDeviceId.value && audioDevices.value.length > 0) {
        selectedDeviceId.value = audioDevices.value[0]?.deviceId || ''
      }
      console.log(`[VoiceCapture] Found ${audioDevices.value.length} audio devices`)
    } catch (e) {
      console.error('[VoiceCapture] Failed to enumerate devices:', e)
    }
@@ -689,36 +327,26 @@ export function useVoiceCapture(options?: {
  // ====== Typing Animation ======
  function animateTyping(targetText: string) {
-    if (typingTimeout) {
+    if (typingTimeout) { clearTimeout(typingTimeout); typingTimeout = null }
      clearTimeout(typingTimeout)
      typingTimeout = null
    }
    if (targetText.length < animatedTranscript.value.length) {
      animatedTranscript.value = targetText
      lastAnimatedLength = targetText.length
      return
    }
    const startIndex = lastAnimatedLength
    function typeNext(index: number) {
      if (index <= targetText.length) {
        animatedTranscript.value = targetText.substring(0, index)
        lastAnimatedLength = index
        if (index < targetText.length) {
-          const delay = 15 + Math.random() * 10
+          typingTimeout = window.setTimeout(() => typeNext(index + 1), 15 + Math.random() * 10)
          typingTimeout = window.setTimeout(() => typeNext(index + 1), delay)
        }
      }
    }
    typeNext(startIndex)
  }
-  watch(transcript, (newVal) => {
+  watch(transcript, (v) => animateTyping(v))
    animateTyping(newVal)
  })
  // ====== Transcript ======
@@ -727,80 +355,43 @@ export function useVoiceCapture(options?: {
    interimTranscript.value = ''
    animatedTranscript.value = ''
    lastAnimatedLength = 0
-    lastProcessedResult = ''
+    if (typingTimeout) { clearTimeout(typingTimeout); typingTimeout = null }
    if (typingTimeout) {
      clearTimeout(typingTimeout)
      typingTimeout = null
    }
  }
  // ====== Lifecycle ======
  async function init() {
    recognition = initRecognition()
    checkMobile()
    supportedMimeType = detectAudioFormat()
    // Only enumerate without getUserMedia — no user gesture here
    // Devices will get full labels after first recording (user gesture)
    await loadAudioDevices(true)
-    const status = await checkWhisperStatusFn()
+    // Subscribe to shared whisper transcriptions
-    if (status?.starting) {
+    if (!unsubTranscription) {
-      console.log('[VoiceCapture] Server is starting, resuming polling...')
+      unsubTranscription = onTranscription(handleTranscription)
      pollWhisperStatus()
    } else if (voiceMode.value === 'whisper' && whisperStatus.value === 'ready') {
      connectWhisperSocket()
    } else if (voiceMode.value === 'whisper' && whisperStatus.value !== 'ready') {
      console.log('[VoiceCapture] Whisper was enabled but server not running, disabling')
      voiceMode.value = 'webspeech'
    }
    // Initialize shared Whisper socket (singleton, safe to call multiple times)
    initWhisperSocket()
    console.log('[VoiceCapture] Initialized (Whisper-only, record-first)')
  }
  function cleanup() {
    stopRecording()
-    recognition = null
+    if (unsubTranscription) { unsubTranscription(); unsubTranscription = null }
    disconnectWhisperSocket()
    if (chunkInterval) clearInterval(chunkInterval)
    if (typingTimeout) clearTimeout(typingTimeout)
-    if (mediaStream) {
+    if (gpuTimeout) clearTimeout(gpuTimeout)
-      mediaStream.getTracks().forEach(track => track.stop())
+    if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null }
-      mediaStream = null
+    if (audioElement) { audioElement.pause(); audioElement = null }
-    }
+    if (lastAudioUrl.value) { URL.revokeObjectURL(lastAudioUrl.value); lastAudioUrl.value = '' }
    if (audioElement) {
      audioElement.pause()
      audioElement = null
    }
    if (lastAudioUrl.value) {
      URL.revokeObjectURL(lastAudioUrl.value)
      lastAudioUrl.value = ''
    }
    isPlayingAudio.value = false
  }
  return {
-    // State
+    isRecording, transcript, interimTranscript, animatedTranscript,
-    isRecording,
+    error, voiceMode, whisperStatus, audioDevices, selectedDeviceId,
-    transcript,
+    isAndroid, lastAudioUrl, isPlayingAudio,
-    interimTranscript,
+    startRecording, stopRecording, loadAudioDevices, selectMicrophone,
-    animatedTranscript,
+    playLastAudio, init, cleanup, clearTranscript
    error,
    voiceMode,
    whisperStatus,
    audioDevices,
    selectedDeviceId,
    isAndroid,
    lastAudioUrl,
    isPlayingAudio,
    // Actions
    startRecording,
    stopRecording,
    toggleWhisperMode,
    checkWhisperStatus: checkWhisperStatusFn,
    loadAudioDevices,
    selectMicrophone,
    playLastAudio,
    init,
    cleanup,
    clearTranscript
  }
 }
--- a/frontend/src/services/tools/handlers/componentHandlers.ts
+++ b/frontend/src/services/tools/handlers/componentHandlers.ts
@@ -71,11 +71,15 @@ export function createComponentHandlers(): ToolConfig[] {
        properties: {
          id: { type: 'string', description: 'ID del componente' },
          componentProps: { type: 'object', description: 'Props para el componente' },
-          mode: { type: 'string', enum: ['replace', 'append'], description: 'Modo' }
+          mode: { type: 'string', enum: ['replace', 'append'], description: 'Modo' },
          x: { type: 'number', description: 'Posicion X inicial' },
          y: { type: 'number', description: 'Posicion Y inicial' },
          width: { type: 'number', description: 'Ancho inicial' },
          height: { type: 'number', description: 'Alto inicial' }
        },
        required: ['id']
      },
-      handler: async (args: { id: string; componentProps?: Record<string, any>; mode?: string }) => {
+      handler: async (args: { id: string; componentProps?: Record<string, any>; mode?: string; x?: number; y?: number; width?: number; height?: number }) => {
        try {
          const definition = await componentsApi.getById(args.id)
          if (!definition) {
@@ -88,7 +92,8 @@ export function createComponentHandlers(): ToolConfig[] {
          removePlaceholder(container)
          const isAppend = args.mode === 'append'
-          const result = renderInlineComponent(definition, container, args.componentProps || {}, isAppend)
+          const layout = { x: args.x, y: args.y, width: args.width, height: args.height }
          const result = renderInlineComponent(definition, container, args.componentProps || {}, isAppend, layout)
          // Track definition for snapshot capture
          getWindowDefinitions().set(definition.id, {
@@ -168,11 +173,13 @@ export function createComponentHandlers(): ToolConfig[] {
            type: 'array',
            items: { type: 'string', enum: ['template', 'setup', 'style', 'props', 'imports'] },
            description: 'Campos a leer (default: template, setup, style)'
-          }
+          },
          offset: { type: 'number', description: 'Linea inicial (1-based)' },
          limit: { type: 'number', description: 'Numero de lineas a leer' }
        },
        required: ['id']
      },
-      handler: async (args: { id: string; fields?: string[] }) => {
+      handler: async (args: { id: string; fields?: string[]; offset?: number; limit?: number }) => {
        try {
          const definition = await componentsApi.getById(args.id)
          if (!definition) return `Error: "${args.id}" not found`
@@ -191,7 +198,14 @@ export function createComponentHandlers(): ToolConfig[] {
              output.push(`--- ${field} ---\n${arr?.length ? JSON.stringify(arr) : '(empty)'}`)
            } else {
              const str = (value as string) || ''
-              output.push(`--- ${field} (${str.length}) ---\n${str || '(empty)'}`)
+              const lines = str.split('\n')
              const total = lines.length
              const start = Math.max(0, (args.offset || 1) - 1)
              const end = args.limit ? start + args.limit : total
              const sliced = lines.slice(start, end)
              const numbered = sliced.map((l, i) => `${String(start + i + 1).padStart(4)}\t${l}`).join('\n')
              const rangeInfo = (args.offset || args.limit) ? ` lines ${start + 1}-${Math.min(end, total)}/${total}` : ` ${total} lines`
              output.push(`--- ${field}${rangeInfo} ---\n${numbered || '(empty)'}`)
            }
          }
--- a/frontend/src/services/whisperSocket.ts
+++ b/frontend/src/services/whisperSocket.ts
@@ -0,0 +1,177 @@
 /**
 * Singleton Whisper WebSocket Service
 * One shared connection used by all voice components (FloatingVoice, useVoiceCapture, etc.)
 */
 import { ref } from 'vue'
 import { endpoints } from '../config/endpoints'
 export type WhisperStatus = 'offline' | 'loading' | 'ready'
 type TranscriptionCallback = (msg: {
  success?: boolean
  text?: string
  error?: string
  partial?: boolean
  model?: string
  device?: string
 }) => void
 // ====== Singleton state ======
 const status = ref<WhisperStatus>('loading')
 let socket: WebSocket | null = null
 let reconnectTimer: number | null = null
 const listeners = new Set<TranscriptionCallback>()
 // ====== Connection management ======
 function connect() {
  if (socket?.readyState === WebSocket.OPEN || socket?.readyState === WebSocket.CONNECTING) return
  console.log('[WhisperSocket] Connecting to', endpoints.whisper)
  socket = new WebSocket(endpoints.whisper)
  const timeout = setTimeout(() => {
    if (socket && socket.readyState !== WebSocket.OPEN) {
      console.error('[WhisperSocket] Connection timeout (10s)')
      socket.close()
      status.value = 'loading'
    }
  }, 10000)
  socket.onopen = () => {
    clearTimeout(timeout)
    console.log('[WhisperSocket] Connected')
    status.value = 'ready'
  }
  socket.onmessage = (event) => {
    try {
      const msg = JSON.parse(event.data)
      if (msg.type === 'ready') {
        console.log('[WhisperSocket] Server ready:', msg.model, msg.device)
        status.value = 'ready'
      } else if (msg.type === 'transcription') {
        // Broadcast to all listeners
        for (const cb of listeners) {
          cb(msg)
        }
      }
    } catch (e) {
      console.error('[WhisperSocket] Message parse error:', e)
    }
  }
  socket.onclose = () => {
    console.log('[WhisperSocket] Closed, will reconnect...')
    socket = null
    status.value = 'loading'
    scheduleReconnect()
  }
  socket.onerror = (e) => {
    console.error('[WhisperSocket] Error:', e)
    status.value = 'loading'
  }
 }
 function scheduleReconnect() {
  if (reconnectTimer) return
  reconnectTimer = window.setTimeout(() => {
    reconnectTimer = null
    checkStatusAndConnect()
  }, 2000)
 }
 async function checkStatusAndConnect() {
  try {
    const res = await fetch('/api/whisper/status')
    const data = await res.json()
    if (data.running) {
      connect()
    } else {
      status.value = 'loading'
      scheduleReconnect()
    }
  } catch {
    status.value = 'loading'
    scheduleReconnect()
  }
 }
 // ====== Public API ======
 /** Initialize the singleton connection (call once at app startup) */
 export function initWhisperSocket() {
  checkStatusAndConnect()
 }
 /** Send audio for transcription */
 export function sendAudio(base64: string, language: string, partial: boolean) {
  if (socket?.readyState === WebSocket.OPEN) {
    socket.send(JSON.stringify({
      type: 'transcribe',
      audio: base64,
      language,
      partial
    }))
  } else {
    console.warn('[WhisperSocket] Not connected, dropping audio')
  }
 }
 /** Subscribe to transcription results. Returns unsubscribe function. */
 export function onTranscription(callback: TranscriptionCallback): () => void {
  listeners.add(callback)
  return () => listeners.delete(callback)
 }
 /** Get reactive status */
 export function getWhisperStatus() {
  return status
 }
 /** Check if socket is connected */
 export function isConnected(): boolean {
  return socket?.readyState === WebSocket.OPEN
 }
 /** Force reconnect (e.g. when user toggles Whisper) */
 export async function reconnect() {
  if (status.value === 'loading' && socket?.readyState === WebSocket.CONNECTING) return
  status.value = 'loading'
  if (socket) {
    socket.close()
    socket = null
  }
  try {
    const res = await fetch('/api/whisper/toggle', { method: 'POST' })
    const data = await res.json()
    if (data.running) {
      connect()
    } else {
      // Poll until ready
      const poll = async () => {
        for (let i = 0; i < 60; i++) {
          await new Promise(r => setTimeout(r, 2000))
          try {
            const s = await fetch('/api/whisper/status')
            const d = await s.json()
            if (d.running) {
              connect()
              return
            }
          } catch { /* retry */ }
        }
        status.value = 'offline'
      }
      poll()
    }
  } catch {
    status.value = 'loading'
    scheduleReconnect()
  }
 }
--- a/package.json
+++ b/package.json
@@ -3,7 +3,7 @@
  "version": "1.0.0",
  "description": "Dynamic canvas for Claude Code interaction",
  "scripts": {
-    "kill-ports": "node -e \"const {execSync} = require('child_process'); [4101,4102,4103,4105].forEach(p => { try { const pid = execSync('netstat -ano | findstr :' + p + ' | findstr LISTENING', {encoding:'utf8'}).split(/\\s+/).pop().trim(); if(pid) execSync('taskkill /PID ' + pid + ' /F', {stdio:'ignore'}); } catch(e){} }); console.log('Ports cleared');\"",
+    "kill-ports": "node -e \"const {execSync} = require('child_process'); [4101,4102,4103,4105].forEach(p => { try { const pid = execSync('netstat -ano | findstr :' + p + ' | findstr LISTENING', {encoding:'utf8'}).split(/\\s+/).pop().trim(); if(pid) execSync('taskkill /PID ' + pid + ' /F', {stdio:'ignore'}); } catch(e){} }); console.log('Ports cleared (4104/whisper preserved)');\"",
    "start": "bun run kill-ports && concurrently -n api,terminal,frontend -c blue,yellow,green \"cd server && bun --watch run index.ts\" \"cd server && bun run terminal.ts\" \"cd frontend && bun run dev --host\"",
    "start:api": "cd server && bun --watch run index.ts",
    "start:terminal": "cd server && bun run terminal.ts",
--- a/server/routes/whisper.ts
+++ b/server/routes/whisper.ts
@@ -50,7 +50,7 @@ export async function handleWhisperRoutes(req: Request): Promise<Response | null
    return Response.json({
      ...result,
      ...state,
-      message: state.enabled ? 'Whisper enabled (GPU)' : 'Whisper disabled (using Web Speech API)'
+      message: state.running ? 'Whisper GPU running' : 'Whisper GPU starting...'
    })
  }
--- a/server/services/whisper.ts
+++ b/server/services/whisper.ts
@@ -1,6 +1,7 @@
 /**
- * Whisper Service - Manages the Python Whisper server process
+ * Whisper Service - Singleton persistent GPU speech-to-text server
- * Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
+ * Auto-starts with the system, auto-restarts on crash.
 * Single instance processes all client requests.
 */
 import { join } from 'path'
@@ -8,18 +9,19 @@ import { Subprocess } from 'bun'
 const WHISPER_PORT = 4104
 const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
 const RESTART_DELAY_MS = 3000  // Wait before auto-restart after crash
 interface WhisperState {
  enabled: boolean
  running: boolean
-  starting: boolean  // Prevents multiple simultaneous start attempts
+  starting: boolean
  process: Subprocess | null
  model: string
  device: string
 }
 const state: WhisperState = {
-  enabled: false,
+  enabled: true,   // Always enabled by default
  running: false,
  starting: false,
  process: null,
@@ -32,104 +34,16 @@ const state: WhisperState = {
 */
 async function killProcessOnPort(port: number): Promise<void> {
  try {
    // Use PowerShell to find and kill process on port
    const proc = Bun.spawn(['powershell', '-Command',
      `Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
    ], { stdout: 'ignore', stderr: 'ignore' })
    await proc.exited
    // Wait a moment for port to be released
    await new Promise(resolve => setTimeout(resolve, 1000))
  } catch {
    // Ignore errors
  }
 }
 /**
 * Start the Whisper Python server
 */
 export async function startWhisperServer(): Promise<boolean> {
  // Prevent multiple simultaneous start attempts
  if (state.starting) {
    return false
  }
  if (state.running && state.process) {
    return true
  }
  state.starting = true
  console.log(`[Whisper] Starting (${state.model})...`)
  // Kill any existing process on the port
  await killProcessOnPort(WHISPER_PORT)
  try {
    // Use Bun.spawn with inherit to show logs directly in console
    // -u flag disables Python output buffering for real-time logs
    const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
      cwd: join(import.meta.dir, '..'),
      stdout: 'inherit',
      stderr: 'inherit',
      env: { ...process.env, PYTHONUNBUFFERED: '1' }
    })
    state.process = proc
    // Wait a bit for the server to start
    await new Promise(resolve => setTimeout(resolve, 2000))
    // Check if process is still running
    if (proc.exitCode !== null) {
      console.error('[Whisper] Process exited with code:', proc.exitCode)
      state.process = null
      state.starting = false
      return false
    }
    // Check if WebSocket is ready
    const isListening = await checkPort(WHISPER_PORT)
    if (isListening) {
      console.log('[Whisper] Ready')
      state.running = true
      state.enabled = true
      state.starting = false
      return true
    }
    // Wait more if model is still loading (up to 120 seconds total for large models)
    for (let i = 0; i < 40; i++) {
      await new Promise(resolve => setTimeout(resolve, 3000))
      if (proc.exitCode !== null) {
        console.error('[Whisper] Process died')
        state.process = null
        state.starting = false
        return false
      }
      const ready = await checkPort(WHISPER_PORT)
      if (ready) {
        console.log('[Whisper] Ready')
        state.running = true
        state.enabled = true
        state.starting = false
        return true
      }
    }
    console.error('[Whisper] Timeout (120s)')
    state.starting = false
    return false
  } catch (err: any) {
    console.error('[Whisper] Error:', err.message)
    state.process = null
    state.starting = false
    return false
  }
 }
 /**
 * Check if Whisper WebSocket is ready using PowerShell
 */
@@ -152,7 +66,125 @@ async function checkPort(port: number): Promise<boolean> {
 }
 /**
- * Stop the Whisper server
+ * Monitor the Whisper process and auto-restart on crash
 */
 function monitorProcess(proc: Subprocess) {
  proc.exited.then((exitCode) => {
    console.error(`[Whisper] Process exited with code ${exitCode}`)
    state.process = null
    state.running = false
    state.starting = false
    // Auto-restart after delay
    console.log(`[Whisper] Auto-restarting in ${RESTART_DELAY_MS / 1000}s...`)
    setTimeout(() => {
      startWhisperServer().catch(err => {
        console.error('[Whisper] Auto-restart failed:', err)
      })
    }, RESTART_DELAY_MS)
  })
 }
 /**
 * Start the Whisper Python server (singleton - only one instance)
 */
 export async function startWhisperServer(): Promise<boolean> {
  // Prevent multiple simultaneous start attempts
  if (state.starting) {
    console.log('[Whisper] Already starting, skipping')
    return false
  }
  // Already running
  if (state.running && state.process) {
    console.log('[Whisper] Already running')
    return true
  }
  // Check if an external instance is already listening
  const alreadyListening = await checkPort(WHISPER_PORT)
  if (alreadyListening) {
    console.log('[Whisper] External instance already running on port', WHISPER_PORT)
    state.running = true
    state.enabled = true
    return true
  }
  state.starting = true
  console.log(`[Whisper] Starting singleton server (${state.model})...`)
  // Kill any orphan process on the port
  await killProcessOnPort(WHISPER_PORT)
  try {
    const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
      cwd: join(import.meta.dir, '..'),
      stdout: 'inherit',
      stderr: 'inherit',
      env: { ...process.env, PYTHONUNBUFFERED: '1' }
    })
    state.process = proc
    // Monitor for crashes and auto-restart
    monitorProcess(proc)
    // Wait for initial startup
    await new Promise(resolve => setTimeout(resolve, 2000))
    // Check if process died immediately
    if (proc.exitCode !== null) {
      console.error('[Whisper] Process exited immediately with code:', proc.exitCode)
      state.process = null
      state.starting = false
      return false
    }
    // Check if WebSocket is ready
    const isListening = await checkPort(WHISPER_PORT)
    if (isListening) {
      console.log('[Whisper] Server ready (GPU)')
      state.running = true
      state.enabled = true
      state.starting = false
      return true
    }
    // Wait for model loading (up to 120 seconds for large-v3)
    for (let i = 0; i < 40; i++) {
      await new Promise(resolve => setTimeout(resolve, 3000))
      if (proc.exitCode !== null) {
        console.error('[Whisper] Process died during model loading')
        state.process = null
        state.starting = false
        return false
      }
      const ready = await checkPort(WHISPER_PORT)
      if (ready) {
        console.log('[Whisper] Server ready (GPU)')
        state.running = true
        state.enabled = true
        state.starting = false
        return true
      }
    }
    console.error('[Whisper] Timeout waiting for server (120s)')
    state.starting = false
    return false
  } catch (err: any) {
    console.error('[Whisper] Start error:', err.message)
    state.process = null
    state.starting = false
    return false
  }
 }
 /**
 * Stop the Whisper server (only for manual override, not used in normal flow)
 */
 export function stopWhisperServer(): boolean {
  if (!state.process) {
@@ -163,8 +195,7 @@ export function stopWhisperServer(): boolean {
    state.process.kill()
    state.process = null
    state.running = false
-    state.enabled = false
+    console.log('[Whisper] Stopped manually')
    console.log('[Whisper] Stopped')
    return true
  } catch (err) {
    console.error('[Whisper] Stop error:', err)
@@ -173,27 +204,26 @@ export function stopWhisperServer(): boolean {
 }
 /**
- * Toggle Whisper server on/off (async - returns immediately when starting)
+ * Toggle is now a no-op for stop - Whisper always stays on.
 * If not running, triggers a start.
 */
 export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
  // Prevent toggle while starting
  if (state.starting) {
-    return { enabled: false, success: false, starting: true }
+    return { enabled: true, success: false, starting: true }
  }
-  if (state.enabled && state.running) {
+  if (state.running) {
-    const success = stopWhisperServer()
+    // Already running - just confirm it's on
-    return { enabled: false, success, starting: false }
+    return { enabled: true, success: true, starting: false }
  } else {
    // Start server in background - don't await
    startWhisperServer().catch(err => {
      console.error('[Whisper] Start error:', err)
      state.starting = false
    })
    // Return immediately - frontend will poll for status
    return { enabled: false, success: true, starting: true }
  }
  // Not running - start it
  startWhisperServer().catch(err => {
    console.error('[Whisper] Start error:', err)
    state.starting = false
  })
  return { enabled: true, success: true, starting: true }
 }
 /**
@@ -217,13 +247,12 @@ export async function getWhisperState(): Promise<{
      state.enabled = true
    } else if (!isListening && state.running) {
      state.running = false
-      state.enabled = false
+      // Keep enabled=true since we auto-restart
      state.process = null
    }
  }
  return {
-    enabled: state.enabled,
+    enabled: true,  // Always enabled
    running: state.running,
    starting: state.starting,
    port: WHISPER_PORT,
@@ -233,15 +262,12 @@ export async function getWhisperState(): Promise<{
 }
 /**
- * Check if Whisper is enabled
+ * Check if Whisper is running
 */
 export function isWhisperEnabled(): boolean {
-  return state.enabled && state.running
+  return state.running
 }
 // WebSocket server for Whisper (proxies to Python server or handles directly)
 let whisperWsServer: any = null
 export function getWhisperPort(): number {
  return WHISPER_PORT
 }
--- a/server/terminal.ts
+++ b/server/terminal.ts
@@ -3,10 +3,12 @@
 * Terminal Server - Independent process
 * This runs separately from the main server to maintain stable Claude Code sessions
 * even when the main server restarts due to code changes.
 * Also manages the Whisper GPU server (singleton, persistent).
 */
 import { startTerminalServer } from './services/terminal'
 import { startSyncServer } from './services/sync-server'
 import { startWhisperServer } from './services/whisper'
 import { WORKING_DIR } from './config'
 console.log('')
@@ -14,6 +16,7 @@ console.log('='.repeat(50))
 console.log('Terminal Server (Independent Process)')
 console.log(`  Terminal WebSocket: ws://localhost:4103`)
 console.log(`  Sync WebSocket (Git + Torch): ws://localhost:4105`)
 console.log(`  Whisper GPU: ws://localhost:4104 (auto-start)`)
 console.log(`  Working Dir: ${WORKING_DIR}`)
 console.log('')
 console.log('This process is stable and won\'t restart')
@@ -23,3 +26,14 @@ console.log('')
 startTerminalServer()
 startSyncServer()
 // Auto-start Whisper GPU server (singleton, persistent, auto-restart on crash)
 startWhisperServer().then(ok => {
  if (ok) {
    console.log('[Whisper] GPU server started successfully')
  } else {
    console.warn('[Whisper] Failed initial start (will auto-retry)')
  }
 }).catch(err => {
  console.error('[Whisper] Boot error:', err)
 })
--- a/server/whisper_server.py
+++ b/server/whisper_server.py
@@ -26,35 +26,25 @@ except ImportError as e:
 def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes:
    """
    Convert audio data to WAV format using ffmpeg.
-    Whisper requires WAV/PCM format, but browsers typically record in WebM/Opus.
+    Uses stdin/stdout pipes so ffmpeg probes the actual data format
    instead of relying on file extensions.
    """
    # Create temp files for input and output
    with tempfile.NamedTemporaryFile(suffix=f".{input_format}", delete=False) as in_file:
        in_file.write(input_data)
        input_path = in_file.name
    output_path = input_path.replace(f".{input_format}", ".wav")
    try:
        # Use ffmpeg to convert to WAV (16kHz mono, which Whisper prefers)
        result = subprocess.run([
-            "ffmpeg", "-y",  # Overwrite output
+            "ffmpeg", "-y",
-            "-i", input_path,  # Input file
+            "-i", "pipe:0",       # Read from stdin (auto-detect format)
-            "-ar", "16000",  # Sample rate 16kHz
+            "-ar", "16000",       # Sample rate 16kHz
-            "-ac", "1",  # Mono
+            "-ac", "1",           # Mono
            "-c:a", "pcm_s16le",  # PCM 16-bit little-endian
-            output_path
+            "-f", "wav",          # Output format
-        ], capture_output=True, text=True, timeout=30)
+            "pipe:1"              # Write to stdout
        ], input=input_data, capture_output=True, timeout=30)
        if result.returncode != 0:
-            print(f"[Whisper] ffmpeg error: {result.stderr}")
+            print(f"[Whisper] ffmpeg error: {result.stderr.decode('utf-8', errors='replace')}")
            return None
-        # Read the converted WAV file
+        return result.stdout
        with open(output_path, "rb") as f:
            wav_data = f.read()
        return wav_data
    except subprocess.TimeoutExpired:
        print("[Whisper] ffmpeg conversion timed out")
@@ -65,16 +55,6 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
    except Exception as e:
        print(f"[Whisper] Conversion error: {e}")
        return None
    finally:
        # Cleanup temp files
        try:
            os.unlink(input_path)
        except:
            pass
        try:
            os.unlink(output_path)
        except:
            pass
 # Configuration
 HOST = "0.0.0.0"  # Listen on all interfaces (needed for Traefik proxy)