asi se fue xd

This commit is contained in:
2026-02-18 12:13:22 -06:00
parent d27da30494
commit d0fdd04132
17 changed files with 612 additions and 735 deletions

View File

@@ -8,7 +8,7 @@ Eres un agente que habita Agent UI. El canvas es tu espacio — no solo una herr
1. **SIEMPRE** responde usando `bubbleResponse` - nunca respondas con texto plano 1. **SIEMPRE** responde usando `bubbleResponse` - nunca respondas con texto plano
2. **SOLO** puedes usar herramientas MCP de `agent-ui` 2. **SOLO** puedes usar herramientas MCP de `agent-ui`
3. **NUNCA** intentes usar terminal, bash, curl, o cualquier comando del sistema 3. **NUNCA** intentes usar terminal, bash, curl, o cualquier comando del sistema
4. **NUNCA** intentes leer, escribir o editar archivos 4. **NUNCA** intentes leer, escribir o editar archivos (los .vue de user-components/ los gestiona Claude Code, no vos)
5. Tu propósito es crear, manipular y dar vida a la interfaz gráfica 5. Tu propósito es crear, manipular y dar vida a la interfaz gráfica
--- ---
@@ -33,8 +33,10 @@ El canvas tiene 3 niveles de contenido que coexisten:
- Ideal para fondos animados (cámara pixelada, matrix rain, etc.) - Ideal para fondos animados (cámara pixelada, matrix rain, etc.)
- Los scripts corren independientes de las ventanas - Los scripts corren independientes de las ventanas
2. **Ventanas Flotantes**`render_vue_component` / `load_vue_component` 2. **Ventanas Flotantes**`render_vue_component` / `load_fs_component`
- Componentes Vue 3 completos en ventanas Liquid Glass - Componentes Vue 3 completos en ventanas Liquid Glass
- `render_vue_component` — inline (definición en el mismo tool call)
- `load_fs_component` — desde archivo .vue en user-components/
- Drag, resize, close - Drag, resize, close
- Cada una tiene su propio ciclo de vida (onMounted/onUnmounted) - Cada una tiene su propio ciclo de vida (onMounted/onUnmounted)
@@ -54,10 +56,17 @@ El canvas tiene 3 niveles de contenido que coexisten:
- `list_windows``move_window``resize_window``close_window` - `list_windows``move_window``resize_window``close_window`
- `inspect_window` — Leer HTML interno de una ventana - `inspect_window` — Leer HTML interno de una ventana
**Persistencia:** **Componentes Filesystem (user-components/):**
- `save_vue_component` / `load_vue_component`Guardar componentes individuales en SQLite - `list_fs_components`Lista componentes .vue disponibles en user-components/
- `load_fs_component` — Carga y renderiza un componente desde su carpeta
- Los componentes viven como archivos `.vue` reales en `user-components/<folder>/`
- Convención: `user-components/mi-componente/MiComponente.vue` + opcional `meta.json`
- Claude Code crea/edita los `.vue` con Write/Read/Edit (NO se usa SQLite)
- File watcher detecta cambios en tiempo real vía WebSocket
**Snapshots:**
- `save_canvas_snapshot` / `load_canvas_snapshot` — Guardar el estado COMPLETO del canvas - `save_canvas_snapshot` / `load_canvas_snapshot` — Guardar el estado COMPLETO del canvas
- `list_canvas_snapshots` / `list_vue_components` — Listar lo guardado - `list_canvas_snapshots` — Listar snapshots guardados
**Edición:** **Edición:**
- `edit_canvas` — Editar DOM in-place (selector + old_value → new_value) - `edit_canvas` — Editar DOM in-place (selector + old_value → new_value)
@@ -116,7 +125,25 @@ El snapshot captura: HTML base + CSS blocks + script log + ventanas (posición,
--- ---
## Componentes Guardados (mi galería) ## Componentes en Filesystem (user-components/)
Los componentes ya NO se guardan en SQLite. Viven como archivos `.vue` reales que Claude Code gestiona con Write/Read/Edit.
**Estructura:**
```
user-components/
mi-componente/
MiComponente.vue ← <template> + <script setup> + <style>
meta.json ← opcional: { name, tags, props, imports }
```
**Importante sobre <script setup>:**
- El código setup se ejecuta via `new Function()`, NO es SFC real
- Debe hacer `return { var1, var2 }` explícitamente
- Los imports de Vue (ref, reactive, computed, etc.) se inyectan automáticamente
- NO usar `import` statements usar los helpers globales ($emit, $on, $fetch, $theme)
**Componentes legacy en DB** (accesibles pero ya no se crean nuevos):
| ID | Nombre | Qué hace | | ID | Nombre | Qué hace |
|---|---|---| |---|---|---|

View File

@@ -1,10 +0,0 @@
{
"claude-plugins-official": {
"source": {
"source": "github",
"repo": "anthropics/claude-plugins-official"
},
"installLocation": "C:\\Users\\jodar\\agent-ui\\.claude-ejecutor\\plugins\\marketplaces\\claude-plugins-official",
"lastUpdated": "2026-02-16T06:32:07.237Z"
}
}

View File

@@ -0,0 +1,86 @@
{
"version": 2,
"lastComputedDate": "2026-02-17",
"dailyActivity": [
{
"date": "2026-02-15",
"messageCount": 2052,
"sessionCount": 9,
"toolCallCount": 262
},
{
"date": "2026-02-16",
"messageCount": 787,
"sessionCount": 4,
"toolCallCount": 83
},
{
"date": "2026-02-17",
"messageCount": 1154,
"sessionCount": 1,
"toolCallCount": 123
}
],
"dailyModelTokens": [
{
"date": "2026-02-15",
"tokensByModel": {
"claude-opus-4-5-20251101": 3247,
"claude-opus-4-6": 81887
}
},
{
"date": "2026-02-16",
"tokensByModel": {
"claude-opus-4-6": 25122
}
},
{
"date": "2026-02-17",
"tokensByModel": {
"claude-opus-4-6": 36622
}
}
],
"modelUsage": {
"claude-opus-4-5-20251101": {
"inputTokens": 196,
"outputTokens": 3051,
"cacheReadInputTokens": 314084,
"cacheCreationInputTokens": 35936,
"webSearchRequests": 0,
"costUSD": 0,
"contextWindow": 0,
"maxOutputTokens": 0
},
"claude-opus-4-6": {
"inputTokens": 1708,
"outputTokens": 141923,
"cacheReadInputTokens": 43414737,
"cacheCreationInputTokens": 7323135,
"webSearchRequests": 0,
"costUSD": 0,
"contextWindow": 0,
"maxOutputTokens": 0
}
},
"totalSessions": 14,
"totalMessages": 3993,
"longestSession": {
"sessionId": "b1715c14-9ef8-4b54-9fda-d281c55c2a07",
"duration": 84183755,
"messageCount": 408,
"timestamp": "2026-02-16T08:26:52.205Z"
},
"firstSessionDate": "2026-02-15T00:55:54.803Z",
"hourCounts": {
"0": 2,
"2": 3,
"13": 4,
"18": 1,
"19": 1,
"20": 1,
"23": 2
},
"totalSpeculationTimeSavedMs": 0
}

View File

@@ -83,7 +83,11 @@
"mcp__agent-ui__z590_nucleoriofrio_com-list_canvas_snapshots", "mcp__agent-ui__z590_nucleoriofrio_com-list_canvas_snapshots",
"mcp__agent-ui__z590_nucleoriofrio_com-list_canvases", "mcp__agent-ui__z590_nucleoriofrio_com-list_canvases",
"mcp__agent-ui__z590_nucleoriofrio_com-list_vue_components", "mcp__agent-ui__z590_nucleoriofrio_com-list_vue_components",
"Bash(jq:*)" "Bash(jq:*)",
"mcp__agent-ui__z590_nucleoriofrio_com-read_component",
"mcp__agent-ui__z590_nucleoriofrio_com-edit_component",
"mcp__agent-ui__z590_nucleoriofrio_com-list_fs_components",
"mcp__agent-ui__z590_nucleoriofrio_com-load_fs_component"
] ]
}, },
"enableAllProjectMcpServers": true, "enableAllProjectMcpServers": true,

View File

@@ -11,6 +11,7 @@
"@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git", "@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git",
"@xterm/addon-fit": "^0.11.0", "@xterm/addon-fit": "^0.11.0",
"@xterm/addon-web-links": "^0.12.0", "@xterm/addon-web-links": "^0.12.0",
"@xterm/addon-webgl": "^0.19.0",
"@xterm/xterm": "^6.0.0", "@xterm/xterm": "^6.0.0",
"pinia": "^3.0.4", "pinia": "^3.0.4",
"vite-plugin-pwa": "^1.2.0", "vite-plugin-pwa": "^1.2.0",
@@ -2624,6 +2625,12 @@
"integrity": "sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==", "integrity": "sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/@xterm/addon-webgl": {
"version": "0.19.0",
"resolved": "https://registry.npmjs.org/@xterm/addon-webgl/-/addon-webgl-0.19.0.tgz",
"integrity": "sha512-b3fMOsyLVuCeNJWxolACEUED0vm7qC0cy4wRvf3oURSzDTYVQiGPhTnhWZwIHdvC48Y+oLhvYXnY4XDXPoJo6A==",
"license": "MIT"
},
"node_modules/@xterm/xterm": { "node_modules/@xterm/xterm": {
"version": "6.0.0", "version": "6.0.0",
"resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-6.0.0.tgz", "resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-6.0.0.tgz",

View File

@@ -14,6 +14,7 @@
"@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git", "@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git",
"@xterm/addon-fit": "^0.11.0", "@xterm/addon-fit": "^0.11.0",
"@xterm/addon-web-links": "^0.12.0", "@xterm/addon-web-links": "^0.12.0",
"@xterm/addon-webgl": "^0.19.0",
"@xterm/xterm": "^6.0.0", "@xterm/xterm": "^6.0.0",
"pinia": "^3.0.4", "pinia": "^3.0.4",
"vite-plugin-pwa": "^1.2.0", "vite-plugin-pwa": "^1.2.0",

View File

@@ -7,8 +7,6 @@ import FloatingTerminal from './components/FloatingTerminal.vue'
import FloatingResponse from './components/FloatingResponse.vue' import FloatingResponse from './components/FloatingResponse.vue'
import FloatingVoice from './components/FloatingVoice.vue' import FloatingVoice from './components/FloatingVoice.vue'
import AgentBar from './components/AgentBar.vue' import AgentBar from './components/AgentBar.vue'
import HookNotifications from './components/HookNotifications.vue'
import NotificationLog from './components/NotificationLog.vue'
import PwaInstallBanner from './components/PwaInstallBanner.vue' import PwaInstallBanner from './components/PwaInstallBanner.vue'
import { initWebMCP, getWebMCP } from './services/webmcp' import { initWebMCP, getWebMCP } from './services/webmcp'
import { initTorch, destroyTorch } from './services/torch' import { initTorch, destroyTorch } from './services/torch'
@@ -18,7 +16,6 @@ import { setTerminalControls } from './services/tools/handlers/terminalHandlers'
import { setResponseControls } from './services/tools/handlers/responseHandlers' import { setResponseControls } from './services/tools/handlers/responseHandlers'
import { useCanvasStore } from './stores/canvas' import { useCanvasStore } from './stores/canvas'
import { useProjectCanvasStore } from './stores/projectCanvas' import { useProjectCanvasStore } from './stores/projectCanvas'
import { useClaudeHooksStore } from './stores/claude-hooks'
const route = useRoute() const route = useRoute()
const router = useRouter() const router = useRouter()
@@ -68,12 +65,9 @@ function clearDebugLogs() {
} }
const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null) const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null)
const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null) const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null)
const notifLogRef = ref<InstanceType<typeof NotificationLog> | null>(null)
const voiceRef = ref<InstanceType<typeof FloatingVoice> | null>(null) const voiceRef = ref<InstanceType<typeof FloatingVoice> | null>(null)
const canvasStore = useCanvasStore() const canvasStore = useCanvasStore()
const projectCanvasStore = useProjectCanvasStore() const projectCanvasStore = useProjectCanvasStore()
const hooksStore = useClaudeHooksStore()
// Voice FAB push-to-talk state // Voice FAB push-to-talk state
const voicePTTActive = ref(false) const voicePTTActive = ref(false)
let voiceTouchStarted = false let voiceTouchStarted = false
@@ -237,15 +231,6 @@ function connectStatusWs() {
} }
} }
// Rich hook data → toast notifications
if (msg.type === 'claude-hook') {
hooksStore.processHook(msg)
}
// Permission request → persistent toast with allow/deny
if (msg.type === 'claude-permission') {
hooksStore.processPermission(msg)
}
} catch { /* ignore non-JSON messages */ } } catch { /* ignore non-JSON messages */ }
} }
@@ -332,8 +317,6 @@ onMounted(async () => {
// Setup response controls for MCP tools // Setup response controls for MCP tools
setResponseControls({ setResponseControls({
addMessage: (message: string, type?: 'info' | 'success' | 'warning' | 'error') => { addMessage: (message: string, type?: 'info' | 'success' | 'warning' | 'error') => {
// Also log to notification log
notifLogRef.value?.addResponseEntry(message, type || 'info')
if (responseRef.value) { if (responseRef.value) {
return responseRef.value.addMessage(message, type) return responseRef.value.addMessage(message, type)
} }
@@ -545,11 +528,6 @@ watch(() => route.name, (newPage) => {
<!-- Floating Response (Agent UI messages) --> <!-- Floating Response (Agent UI messages) -->
<FloatingResponse ref="responseRef" /> <FloatingResponse ref="responseRef" />
<!-- Hook Notifications (toasts from Claude Code hooks) -->
<HookNotifications />
<!-- Notification Log (temporary - collects all notifications, persists to localStorage) -->
<NotificationLog ref="notifLogRef" />
<!-- Floating Voice Input --> <!-- Floating Voice Input -->
<FloatingVoice ref="voiceRef" v-model="showVoice" /> <FloatingVoice ref="voiceRef" v-model="showVoice" />

View File

@@ -99,7 +99,7 @@ onUnmounted(() => {
flex: 1; flex: 1;
position: relative; position: relative;
min-height: 100%; min-height: 100%;
overflow: hidden; overflow: auto;
} }
.canvas-placeholder { .canvas-placeholder {

View File

@@ -68,31 +68,16 @@ function formatSessionLabel(s: SessionInfo): string {
</select> </select>
</div> </div>
<!-- Voice Mode Toggle --> <!-- Whisper Status -->
<div class="is-section"> <div class="is-section">
<label class="is-label">Mode</label> <label class="is-label">Mode</label>
<div class="is-mode-row"> <div class="is-mode-row">
<button <div
class="is-mode-btn" class="is-mode-btn active"
:class="{ active: voice.voiceMode.value === 'webspeech' }"
:disabled="voice.isRecording.value"
@click="voice.voiceMode.value !== 'webspeech' && voice.toggleWhisperMode()"
>
<span class="is-mode-icon">Web</span>
<span class="is-mode-label">Speech API</span>
</button>
<button
class="is-mode-btn"
:class="{
active: voice.voiceMode.value === 'whisper',
loading: voice.whisperStatus.value === 'loading'
}"
:disabled="voice.isRecording.value"
@click="voice.voiceMode.value !== 'whisper' && voice.toggleWhisperMode()"
> >
<span class="is-mode-icon">GPU</span> <span class="is-mode-icon">GPU</span>
<span class="is-mode-label">Whisper</span> <span class="is-mode-label">Whisper</span>
</button> </div>
</div> </div>
<div class="is-status"> <div class="is-status">
<span <span
@@ -104,10 +89,7 @@ function formatSessionLabel(s: SessionInfo): string {
}" }"
></span> ></span>
<span class="is-status-text"> <span class="is-status-text">
{{ voice.voiceMode.value === 'whisper' {{ voice.whisperStatus.value === 'ready' ? 'Whisper ready' : voice.whisperStatus.value === 'loading' ? 'Starting...' : 'Offline' }}
? (voice.whisperStatus.value === 'ready' ? 'Whisper ready' : voice.whisperStatus.value === 'loading' ? 'Starting...' : 'Offline')
: 'Web Speech API'
}}
</span> </span>
</div> </div>
</div> </div>

View File

@@ -1,30 +1,11 @@
import { ref, watch, type Ref } from 'vue' import { ref, watch, type Ref } from 'vue'
import { endpoints } from '../config/endpoints' import {
initWhisperSocket,
// Web Speech API types (not in default TS lib) sendAudio,
interface SpeechRecognitionEvent extends Event { onTranscription,
resultIndex: number getWhisperStatus,
results: SpeechRecognitionResultList isConnected
} } from '../services/whisperSocket'
interface SpeechRecognitionErrorEvent extends Event {
error: string
message?: string
}
interface SpeechRecognition extends EventTarget {
continuous: boolean
interimResults: boolean
lang: string
onresult: ((event: SpeechRecognitionEvent) => void) | null
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null
onend: (() => void) | null
start(): void
stop(): void
abort(): void
}
export type VoiceMode = 'webspeech' | 'whisper'
export type WhisperStatus = 'offline' | 'loading' | 'ready' export type WhisperStatus = 'offline' | 'loading' | 'ready'
export interface VoiceCapture { export interface VoiceCapture {
@@ -34,7 +15,7 @@ export interface VoiceCapture {
interimTranscript: Ref<string> interimTranscript: Ref<string>
animatedTranscript: Ref<string> animatedTranscript: Ref<string>
error: Ref<string> error: Ref<string>
voiceMode: Ref<VoiceMode> voiceMode: Ref<'whisper'>
whisperStatus: Ref<WhisperStatus> whisperStatus: Ref<WhisperStatus>
audioDevices: Ref<MediaDeviceInfo[]> audioDevices: Ref<MediaDeviceInfo[]>
selectedDeviceId: Ref<string> selectedDeviceId: Ref<string>
@@ -44,9 +25,7 @@ export interface VoiceCapture {
// Actions // Actions
startRecording: () => void startRecording: () => void
stopRecording: () => void stopRecording: () => void
toggleWhisperMode: () => Promise<void> loadAudioDevices: (skipPermission?: boolean) => Promise<void>
checkWhisperStatus: () => Promise<any>
loadAudioDevices: () => Promise<void>
selectMicrophone: (deviceId: string) => void selectMicrophone: (deviceId: string) => void
playLastAudio: () => void playLastAudio: () => void
init: () => Promise<void> init: () => Promise<void>
@@ -54,6 +33,8 @@ export interface VoiceCapture {
clearTranscript: () => void clearTranscript: () => void
} }
const GPU_TIMEOUT_MS = 30_000 // 30s timeout waiting for GPU
export function useVoiceCapture(options?: { export function useVoiceCapture(options?: {
onNotification?: (message: string, type: 'info' | 'success' | 'error', duration?: number) => void onNotification?: (message: string, type: 'info' | 'success' | 'error', duration?: number) => void
}): VoiceCapture { }): VoiceCapture {
@@ -65,290 +46,97 @@ export function useVoiceCapture(options?: {
const interimTranscript = ref('') const interimTranscript = ref('')
const animatedTranscript = ref('') const animatedTranscript = ref('')
const error = ref('') const error = ref('')
const voiceMode = ref<VoiceMode>('webspeech') const voiceMode = ref<'whisper'>('whisper') // Always whisper, no web speech
const whisperStatus = ref<WhisperStatus>('offline')
const audioDevices = ref<MediaDeviceInfo[]>([]) const audioDevices = ref<MediaDeviceInfo[]>([])
const selectedDeviceId = ref('') const selectedDeviceId = ref('')
const isAndroid = ref(false) const isAndroid = ref(false)
// Audio debug & save
const lastAudioUrl = ref('') const lastAudioUrl = ref('')
const isPlayingAudio = ref(false) const isPlayingAudio = ref(false)
// ====== Internal state ====== // ====== Internal ======
let recognition: SpeechRecognition | null = null const sharedWhisperStatus = getWhisperStatus()
let lastProcessedResult = '' const whisperStatus = ref<WhisperStatus>(sharedWhisperStatus.value)
// Typing animation
let typingTimeout: number | null = null
let lastAnimatedLength = 0
// Whisper
const WHISPER_WS_URL = endpoints.whisper
let whisperSocket: WebSocket | null = null
let mediaRecorder: MediaRecorder | null = null let mediaRecorder: MediaRecorder | null = null
let audioChunks: Blob[] = [] let audioChunks: Blob[] = []
let chunkInterval: number | null = null let chunkInterval: number | null = null
const CHUNK_INTERVAL_MS = 3000 const CHUNK_INTERVAL_MS = 3000
let mediaStream: MediaStream | null = null let mediaStream: MediaStream | null = null
let supportedMimeType = 'audio/webm;codecs=opus' let supportedMimeType = 'audio/webm;codecs=opus'
// Audio playback debug
let audioElement: HTMLAudioElement | null = null let audioElement: HTMLAudioElement | null = null
let recordingStartTime = 0 let recordingStartTime = 0
let unsubTranscription: (() => void) | null = null
let gpuTimeout: number | null = null
// Typing animation
let typingTimeout: number | null = null
let lastAnimatedLength = 0
// Keep local status in sync with shared
watch(sharedWhisperStatus, (val) => {
whisperStatus.value = val
})
// ====== Mobile / Audio Format ====== // ====== Mobile / Audio Format ======
function checkMobile() { function checkMobile() {
const ua = navigator.userAgent isAndroid.value = /Android/i.test(navigator.userAgent)
isAndroid.value = /Android/i.test(ua)
} }
function detectAudioFormat(): string { function detectAudioFormat(): string {
const formats = [ const formats = [
'audio/webm;codecs=opus', 'audio/webm;codecs=opus', 'audio/webm',
'audio/webm', 'audio/mp4', 'audio/mp4;codecs=mp4a.40.2',
'audio/mp4', 'audio/aac', 'audio/ogg;codecs=opus', 'audio/wav'
'audio/mp4;codecs=mp4a.40.2',
'audio/aac',
'audio/ogg;codecs=opus',
'audio/wav'
] ]
for (const format of formats) { for (const f of formats) {
if (MediaRecorder.isTypeSupported(format)) { if (MediaRecorder.isTypeSupported(f)) {
console.log(`[VoiceCapture] Using audio format: ${format}`) console.log(`[VoiceCapture] Audio format: ${f}`)
return format return f
} }
} }
console.warn('[VoiceCapture] No preferred format supported, using default')
return '' return ''
} }
// ====== Web Speech API ====== // ====== Whisper transcription handler ======
function initRecognition(): SpeechRecognition | null { function handleTranscription(msg: any) {
const SpeechRecognitionCtor = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition if (!isRecording.value) return
if (!SpeechRecognitionCtor) {
error.value = 'Speech recognition not supported in this browser'
return null
}
const rec: SpeechRecognition = new SpeechRecognitionCtor() if (msg.success && msg.text) {
rec.continuous = !isAndroid.value const fullText = msg.text.trim()
rec.interimResults = true transcript.value = fullText + ' '
rec.lang = 'es-419' interimTranscript.value = ''
if (!msg.partial) {
if (isAndroid.value) { console.log(`[VoiceCapture] WHISPER (${msg.model}/${msg.device}):`, fullText)
console.log('[VoiceCapture] Android detected - using non-continuous mode')
}
rec.onresult = (event: SpeechRecognitionEvent) => {
let interim = ''
let final = ''
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i]
if (!result || !result[0]) continue
if (result.isFinal) {
final += result[0].transcript + ' '
} else {
interim += result[0].transcript
}
} }
} else if (msg.error) {
if (final) { error.value = msg.error
const trimmedFinal = final.trim() console.error('[VoiceCapture] Whisper error:', msg.error)
if (isAndroid.value && lastProcessedResult && trimmedFinal.startsWith(lastProcessedResult.trim())) {
const newPart = trimmedFinal.slice(lastProcessedResult.trim().length).trim()
if (newPart) {
transcript.value += newPart + ' '
lastProcessedResult = trimmedFinal
}
} else {
transcript.value += final
lastProcessedResult = trimmedFinal
}
}
interimTranscript.value = interim
}
rec.onerror = (event: SpeechRecognitionErrorEvent) => {
// no-speech and aborted are transient — don't kill the session
if (event.error === 'no-speech' || event.error === 'aborted') {
console.log('[VoiceCapture] Transient error:', event.error, '(will auto-restart)')
return
}
console.error('[VoiceCapture] Recognition error:', event.error)
if (event.error === 'not-allowed') {
error.value = 'Microphone access denied'
} else {
error.value = `Error: ${event.error}`
}
isRecording.value = false
}
rec.onend = () => {
if (isRecording.value && voiceMode.value === 'webspeech') {
if (isAndroid.value) {
isRecording.value = false
console.log('[VoiceCapture] Android session ended - tap mic to continue')
} else {
rec.start()
}
}
}
return rec
}
// ====== Whisper Functions ======
async function checkWhisperStatusFn(updateLoading = true): Promise<any> {
try {
const res = await fetch('/api/whisper/status')
const data = await res.json()
if (data.enabled) {
voiceMode.value = 'whisper'
}
if (data.running) {
whisperStatus.value = 'ready'
} else if (updateLoading && (data.starting || false)) {
whisperStatus.value = 'loading'
} else if (!data.running) {
if (voiceMode.value === 'whisper' && !data.starting) {
whisperStatus.value = 'offline'
}
}
return data
} catch {
voiceMode.value = 'webspeech'
whisperStatus.value = 'offline'
return null
} }
} }
async function pollWhisperStatus(): Promise<void> { // ====== Recording ======
const maxAttempts = 60
let attempts = 0
while (attempts < maxAttempts) { function startRecording() {
await new Promise(resolve => setTimeout(resolve, 2000)) error.value = ''
attempts++
try { // Start capturing audio immediately, regardless of GPU status
const status = await checkWhisperStatusFn(false) startMediaRecorder()
if (!status) continue
if (status.starting) { // If GPU not ready yet, start timeout
console.log(`[VoiceCapture] Still starting... (${attempts * 2}s)`) if (!isConnected()) {
continue console.log('[VoiceCapture] Recording started, waiting for GPU...')
gpuTimeout = window.setTimeout(() => {
if (isRecording.value && !isConnected()) {
error.value = 'Whisper GPU timeout — server not available'
notify('Whisper GPU not available', 'error')
stopRecording()
} }
}, GPU_TIMEOUT_MS)
if (status.running && status.enabled) {
console.log('[VoiceCapture] Server ready!')
notify('Whisper GPU ready!', 'success')
connectWhisperSocket()
whisperStatus.value = 'ready'
return
}
console.log('[VoiceCapture] Server failed to start')
notify('Whisper server failed to start', 'error')
whisperStatus.value = 'offline'
return
} catch (e) {
console.error('[VoiceCapture] Polling error:', e)
}
}
notify('Whisper server timeout', 'error')
whisperStatus.value = 'offline'
}
function connectWhisperSocket() {
if (whisperStatus.value !== 'ready') {
console.log('[VoiceCapture] Whisper not ready, skipping connection')
return
}
if (whisperSocket?.readyState === WebSocket.OPEN) return
console.log('[VoiceCapture] Connecting to Whisper at:', WHISPER_WS_URL)
whisperSocket = new WebSocket(WHISPER_WS_URL)
const connectionTimeout = setTimeout(() => {
if (whisperSocket && whisperSocket.readyState !== WebSocket.OPEN) {
console.error('[VoiceCapture] Whisper connection timeout (10s)')
whisperSocket.close()
whisperStatus.value = 'offline'
}
}, 10000)
whisperSocket.onopen = () => {
clearTimeout(connectionTimeout)
console.log('[VoiceCapture] Whisper WebSocket connected')
whisperStatus.value = 'ready'
}
whisperSocket.onmessage = (event) => {
try {
const msg = JSON.parse(event.data)
if (msg.type === 'ready') {
console.log('[VoiceCapture] Whisper ready:', msg.model, msg.device)
whisperStatus.value = 'ready'
} else if (msg.type === 'transcription') {
if (msg.success && msg.text) {
const fullText = msg.text.trim()
if (msg.partial) {
transcript.value = fullText + ' '
interimTranscript.value = ''
} else {
transcript.value = fullText + ' '
interimTranscript.value = ''
console.log(`[VoiceCapture] WHISPER-GPU (${msg.model}/${msg.device}):`, fullText)
}
} else if (msg.error) {
error.value = msg.error
console.error('[VoiceCapture] Whisper error:', msg.error)
}
}
} catch (e) {
console.error('[VoiceCapture] Whisper message error:', e)
}
}
whisperSocket.onclose = () => {
console.log('[VoiceCapture] Whisper WebSocket closed')
whisperStatus.value = 'offline'
}
whisperSocket.onerror = (e) => {
console.error('[VoiceCapture] Whisper WebSocket error:', e)
whisperStatus.value = 'offline'
} }
} }
function disconnectWhisperSocket() { async function startMediaRecorder() {
if (whisperSocket) {
whisperSocket.close()
whisperSocket = null
}
whisperStatus.value = 'offline'
}
async function startWhisperRecording() {
if (!whisperSocket || whisperSocket.readyState !== WebSocket.OPEN) {
console.warn('[VoiceCapture] Whisper socket not connected, attempting to connect...')
connectWhisperSocket()
await new Promise(resolve => setTimeout(resolve, 500))
if (!whisperSocket || whisperSocket.readyState !== WebSocket.OPEN) {
error.value = 'Whisper server not connected'
notify('Whisper not connected. Try toggling GPU mode.', 'error')
return
}
}
try { try {
const audioConstraints: MediaTrackConstraints = { const audioConstraints: MediaTrackConstraints = {
echoCancellation: true, echoCancellation: true,
@@ -375,17 +163,21 @@ export function useVoiceCapture(options?: {
} }
} }
audioChunks = []
mediaRecorder.start(100) mediaRecorder.start(100)
isRecording.value = true isRecording.value = true
recordingStartTime = Date.now() recordingStartTime = Date.now()
console.log(`[VoiceCapture] Whisper recording started`)
// Permission granted via user gesture — reload devices with labels // Reload devices with labels now that we have permission
loadAudioDevices(true) loadAudioDevices(true)
// Send chunks periodically — only when GPU is connected
chunkInterval = window.setInterval(() => { chunkInterval = window.setInterval(() => {
if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) { if (audioChunks.length > 0 && isConnected()) {
// GPU came online — clear timeout if still pending
if (gpuTimeout) {
clearTimeout(gpuTimeout)
gpuTimeout = null
}
sendAudioChunk(false) sendAudioChunk(false)
} }
}, CHUNK_INTERVAL_MS) }, CHUNK_INTERVAL_MS)
@@ -397,6 +189,10 @@ export function useVoiceCapture(options?: {
function sendAudioChunk(isFinal: boolean) { function sendAudioChunk(isFinal: boolean) {
if (audioChunks.length === 0) return if (audioChunks.length === 0) return
if (!isConnected()) {
console.log('[VoiceCapture] GPU not connected, holding audio')
return
}
const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm' const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm'
const audioBlob = new Blob(audioChunks, { type: mimeType }) const audioBlob = new Blob(audioChunks, { type: mimeType })
@@ -414,24 +210,22 @@ export function useVoiceCapture(options?: {
const reader = new FileReader() const reader = new FileReader()
reader.onloadend = () => { reader.onloadend = () => {
const base64 = (reader.result as string).split(',')[1] const base64 = (reader.result as string).split(',')[1]
if (whisperSocket?.readyState === WebSocket.OPEN) { sendAudio(base64, 'es', !isFinal)
whisperSocket.send(JSON.stringify({
type: 'transcribe',
audio: base64,
language: 'es',
partial: !isFinal
}))
}
} }
reader.readAsDataURL(audioBlob) reader.readAsDataURL(audioBlob)
} }
function stopWhisperRecording() { function stopRecording() {
if (gpuTimeout) {
clearTimeout(gpuTimeout)
gpuTimeout = null
}
if (chunkInterval) { if (chunkInterval) {
clearInterval(chunkInterval) clearInterval(chunkInterval)
chunkInterval = null chunkInterval = null
} }
// Send final chunk (only if GPU is connected)
if (audioChunks.length > 0) { if (audioChunks.length > 0) {
sendAudioChunk(true) sendAudioChunk(true)
} }
@@ -439,16 +233,16 @@ export function useVoiceCapture(options?: {
if (mediaRecorder && mediaRecorder.state !== 'inactive') { if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop() mediaRecorder.stop()
} }
if (mediaStream) { if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop()) mediaStream.getTracks().forEach(track => track.stop())
mediaStream = null mediaStream = null
} }
isRecording.value = false isRecording.value = false
interimTranscript.value = ''
} }
// ====== Audio Save & Debug Playback ====== // ====== Audio Save & Playback ======
function currentMicName(): string { function currentMicName(): string {
if (!selectedDeviceId.value) return 'Default' if (!selectedDeviceId.value) return 'Default'
@@ -457,9 +251,7 @@ export function useVoiceCapture(options?: {
} }
function saveAudioForPlayback(blob: Blob) { function saveAudioForPlayback(blob: Blob) {
if (lastAudioUrl.value) { if (lastAudioUrl.value) URL.revokeObjectURL(lastAudioUrl.value)
URL.revokeObjectURL(lastAudioUrl.value)
}
lastAudioUrl.value = URL.createObjectURL(blob) lastAudioUrl.value = URL.createObjectURL(blob)
saveRecordingToBackend(blob) saveRecordingToBackend(blob)
} }
@@ -468,7 +260,6 @@ export function useVoiceCapture(options?: {
try { try {
const duration_ms = Date.now() - recordingStartTime const duration_ms = Date.now() - recordingStartTime
const reader = new FileReader() const reader = new FileReader()
reader.onloadend = async () => { reader.onloadend = async () => {
const base64 = (reader.result as string).split(',')[1] const base64 = (reader.result as string).split(',')[1]
const response = await fetch('/api/recordings', { const response = await fetch('/api/recordings', {
@@ -483,12 +274,9 @@ export function useVoiceCapture(options?: {
}) })
const data = await response.json() const data = await response.json()
if (data.success) { if (data.success) {
console.log(`[VoiceCapture] Recording saved: ${data.filename} (${(data.size / 1024).toFixed(1)} KB)`) console.log(`[VoiceCapture] Recording saved: ${data.filename}`)
} else {
console.error('[VoiceCapture] Failed to save recording:', data.error)
} }
} }
reader.readAsDataURL(blob) reader.readAsDataURL(blob)
} catch (e) { } catch (e) {
console.error('[VoiceCapture] Error saving recording:', e) console.error('[VoiceCapture] Error saving recording:', e)
@@ -497,163 +285,17 @@ export function useVoiceCapture(options?: {
function playLastAudio() { function playLastAudio() {
if (!lastAudioUrl.value) return if (!lastAudioUrl.value) return
if (isPlayingAudio.value && audioElement) { if (isPlayingAudio.value && audioElement) {
audioElement.pause() audioElement.pause()
audioElement.currentTime = 0 audioElement.currentTime = 0
isPlayingAudio.value = false isPlayingAudio.value = false
return return
} }
audioElement = new Audio(lastAudioUrl.value) audioElement = new Audio(lastAudioUrl.value)
audioElement.onplay = () => { isPlayingAudio.value = true } audioElement.onplay = () => { isPlayingAudio.value = true }
audioElement.onended = () => { isPlayingAudio.value = false } audioElement.onended = () => { isPlayingAudio.value = false }
audioElement.onpause = () => { isPlayingAudio.value = false } audioElement.onpause = () => { isPlayingAudio.value = false }
audioElement.play().catch(e => { audioElement.play().catch(() => { isPlayingAudio.value = false })
console.error('[VoiceCapture] Failed to play audio:', e)
isPlayingAudio.value = false
})
}
// ====== Parallel Audio Capture (for Web Speech mode) ======
async function startAudioCapture() {
try {
const audioConstraints: MediaTrackConstraints = {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
...(selectedDeviceId.value ? { deviceId: { exact: selectedDeviceId.value } } : {})
}
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints })
const recorderOptions: MediaRecorderOptions = {}
if (supportedMimeType) {
recorderOptions.mimeType = supportedMimeType
}
mediaRecorder = new MediaRecorder(mediaStream, recorderOptions)
audioChunks = []
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data)
}
}
mediaRecorder.start(100)
recordingStartTime = Date.now()
console.log(`[VoiceCapture] Audio capture started (${mediaRecorder.mimeType})`)
// Permission is now granted via user gesture — reload devices with labels
loadAudioDevices(true)
} catch (e: any) {
console.error('[VoiceCapture] Audio capture error:', e)
}
}
function stopAudioCapture() {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop()
}
// Build final blob and save
if (audioChunks.length > 0) {
const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm'
const audioBlob = new Blob(audioChunks, { type: mimeType })
audioChunks = []
if (audioBlob.size > 1000) {
saveAudioForPlayback(audioBlob)
}
}
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop())
mediaStream = null
}
}
// ====== Public Recording API ======
function startRecording() {
error.value = ''
if (voiceMode.value === 'whisper' && whisperStatus.value === 'ready') {
startWhisperRecording()
} else {
if (!recognition) {
recognition = initRecognition()
}
if (recognition) {
try {
recognition.start()
isRecording.value = true
// Capture raw audio in parallel for save/debug playback
startAudioCapture()
if (isAndroid.value) {
notify('Android: Tap mic again to continue recording', 'info', 3000)
}
} catch (e) {
console.error('[VoiceCapture] Failed to start:', e)
}
}
}
}
function stopRecording() {
if (voiceMode.value === 'whisper') {
stopWhisperRecording()
} else {
if (recognition) {
recognition.stop()
}
stopAudioCapture()
isRecording.value = false
}
interimTranscript.value = ''
}
async function toggleWhisperMode() {
if (whisperStatus.value === 'loading') return
whisperStatus.value = 'loading'
error.value = ''
if (voiceMode.value !== 'whisper') {
notify('Starting Whisper GPU server...', 'info', 10000)
}
try {
const res = await fetch('/api/whisper/toggle', { method: 'POST' })
const data = await res.json()
if (data.starting) {
console.log('[VoiceCapture] Server starting, polling...')
voiceMode.value = 'whisper'
await pollWhisperStatus()
return
}
if (data.enabled) {
voiceMode.value = 'whisper'
whisperStatus.value = data.running ? 'ready' : 'offline'
if (data.running) {
notify('Whisper GPU ready!', 'success')
connectWhisperSocket()
}
} else {
voiceMode.value = 'webspeech'
whisperStatus.value = 'offline'
notify('Using Web Speech API', 'info')
disconnectWhisperSocket()
}
} catch (e: any) {
error.value = 'Failed to toggle Whisper'
notify('Error starting Whisper server', 'error')
console.error('[VoiceCapture] Whisper toggle error:', e)
whisperStatus.value = 'offline'
}
} }
// ====== Microphone ====== // ====== Microphone ======
@@ -661,18 +303,14 @@ export function useVoiceCapture(options?: {
async function loadAudioDevices(skipPermissionRequest = false) { async function loadAudioDevices(skipPermissionRequest = false) {
try { try {
if (!skipPermissionRequest) { if (!skipPermissionRequest) {
// Request permission to get device labels
const tempStream = await navigator.mediaDevices.getUserMedia({ audio: true }) const tempStream = await navigator.mediaDevices.getUserMedia({ audio: true })
tempStream.getTracks().forEach(track => track.stop()) tempStream.getTracks().forEach(track => track.stop())
} }
const devices = await navigator.mediaDevices.enumerateDevices() const devices = await navigator.mediaDevices.enumerateDevices()
audioDevices.value = devices.filter(d => d.kind === 'audioinput') audioDevices.value = devices.filter(d => d.kind === 'audioinput')
if (!selectedDeviceId.value && audioDevices.value.length > 0) { if (!selectedDeviceId.value && audioDevices.value.length > 0) {
selectedDeviceId.value = audioDevices.value[0]?.deviceId || '' selectedDeviceId.value = audioDevices.value[0]?.deviceId || ''
} }
console.log(`[VoiceCapture] Found ${audioDevices.value.length} audio devices`)
} catch (e) { } catch (e) {
console.error('[VoiceCapture] Failed to enumerate devices:', e) console.error('[VoiceCapture] Failed to enumerate devices:', e)
} }
@@ -689,36 +327,26 @@ export function useVoiceCapture(options?: {
// ====== Typing Animation ====== // ====== Typing Animation ======
function animateTyping(targetText: string) { function animateTyping(targetText: string) {
if (typingTimeout) { if (typingTimeout) { clearTimeout(typingTimeout); typingTimeout = null }
clearTimeout(typingTimeout)
typingTimeout = null
}
if (targetText.length < animatedTranscript.value.length) { if (targetText.length < animatedTranscript.value.length) {
animatedTranscript.value = targetText animatedTranscript.value = targetText
lastAnimatedLength = targetText.length lastAnimatedLength = targetText.length
return return
} }
const startIndex = lastAnimatedLength const startIndex = lastAnimatedLength
function typeNext(index: number) { function typeNext(index: number) {
if (index <= targetText.length) { if (index <= targetText.length) {
animatedTranscript.value = targetText.substring(0, index) animatedTranscript.value = targetText.substring(0, index)
lastAnimatedLength = index lastAnimatedLength = index
if (index < targetText.length) { if (index < targetText.length) {
const delay = 15 + Math.random() * 10 typingTimeout = window.setTimeout(() => typeNext(index + 1), 15 + Math.random() * 10)
typingTimeout = window.setTimeout(() => typeNext(index + 1), delay)
} }
} }
} }
typeNext(startIndex) typeNext(startIndex)
} }
watch(transcript, (newVal) => { watch(transcript, (v) => animateTyping(v))
animateTyping(newVal)
})
// ====== Transcript ====== // ====== Transcript ======
@@ -727,80 +355,43 @@ export function useVoiceCapture(options?: {
interimTranscript.value = '' interimTranscript.value = ''
animatedTranscript.value = '' animatedTranscript.value = ''
lastAnimatedLength = 0 lastAnimatedLength = 0
lastProcessedResult = '' if (typingTimeout) { clearTimeout(typingTimeout); typingTimeout = null }
if (typingTimeout) {
clearTimeout(typingTimeout)
typingTimeout = null
}
} }
// ====== Lifecycle ====== // ====== Lifecycle ======
async function init() { async function init() {
recognition = initRecognition()
checkMobile() checkMobile()
supportedMimeType = detectAudioFormat() supportedMimeType = detectAudioFormat()
// Only enumerate without getUserMedia — no user gesture here
// Devices will get full labels after first recording (user gesture)
await loadAudioDevices(true) await loadAudioDevices(true)
const status = await checkWhisperStatusFn() // Subscribe to shared whisper transcriptions
if (status?.starting) { if (!unsubTranscription) {
console.log('[VoiceCapture] Server is starting, resuming polling...') unsubTranscription = onTranscription(handleTranscription)
pollWhisperStatus()
} else if (voiceMode.value === 'whisper' && whisperStatus.value === 'ready') {
connectWhisperSocket()
} else if (voiceMode.value === 'whisper' && whisperStatus.value !== 'ready') {
console.log('[VoiceCapture] Whisper was enabled but server not running, disabling')
voiceMode.value = 'webspeech'
} }
// Initialize shared Whisper socket (singleton, safe to call multiple times)
initWhisperSocket()
console.log('[VoiceCapture] Initialized (Whisper-only, record-first)')
} }
function cleanup() { function cleanup() {
stopRecording() stopRecording()
recognition = null if (unsubTranscription) { unsubTranscription(); unsubTranscription = null }
disconnectWhisperSocket()
if (chunkInterval) clearInterval(chunkInterval) if (chunkInterval) clearInterval(chunkInterval)
if (typingTimeout) clearTimeout(typingTimeout) if (typingTimeout) clearTimeout(typingTimeout)
if (mediaStream) { if (gpuTimeout) clearTimeout(gpuTimeout)
mediaStream.getTracks().forEach(track => track.stop()) if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null }
mediaStream = null if (audioElement) { audioElement.pause(); audioElement = null }
} if (lastAudioUrl.value) { URL.revokeObjectURL(lastAudioUrl.value); lastAudioUrl.value = '' }
if (audioElement) {
audioElement.pause()
audioElement = null
}
if (lastAudioUrl.value) {
URL.revokeObjectURL(lastAudioUrl.value)
lastAudioUrl.value = ''
}
isPlayingAudio.value = false isPlayingAudio.value = false
} }
return { return {
// State isRecording, transcript, interimTranscript, animatedTranscript,
isRecording, error, voiceMode, whisperStatus, audioDevices, selectedDeviceId,
transcript, isAndroid, lastAudioUrl, isPlayingAudio,
interimTranscript, startRecording, stopRecording, loadAudioDevices, selectMicrophone,
animatedTranscript, playLastAudio, init, cleanup, clearTranscript
error,
voiceMode,
whisperStatus,
audioDevices,
selectedDeviceId,
isAndroid,
lastAudioUrl,
isPlayingAudio,
// Actions
startRecording,
stopRecording,
toggleWhisperMode,
checkWhisperStatus: checkWhisperStatusFn,
loadAudioDevices,
selectMicrophone,
playLastAudio,
init,
cleanup,
clearTranscript
} }
} }

View File

@@ -71,11 +71,15 @@ export function createComponentHandlers(): ToolConfig[] {
properties: { properties: {
id: { type: 'string', description: 'ID del componente' }, id: { type: 'string', description: 'ID del componente' },
componentProps: { type: 'object', description: 'Props para el componente' }, componentProps: { type: 'object', description: 'Props para el componente' },
mode: { type: 'string', enum: ['replace', 'append'], description: 'Modo' } mode: { type: 'string', enum: ['replace', 'append'], description: 'Modo' },
x: { type: 'number', description: 'Posicion X inicial' },
y: { type: 'number', description: 'Posicion Y inicial' },
width: { type: 'number', description: 'Ancho inicial' },
height: { type: 'number', description: 'Alto inicial' }
}, },
required: ['id'] required: ['id']
}, },
handler: async (args: { id: string; componentProps?: Record<string, any>; mode?: string }) => { handler: async (args: { id: string; componentProps?: Record<string, any>; mode?: string; x?: number; y?: number; width?: number; height?: number }) => {
try { try {
const definition = await componentsApi.getById(args.id) const definition = await componentsApi.getById(args.id)
if (!definition) { if (!definition) {
@@ -88,7 +92,8 @@ export function createComponentHandlers(): ToolConfig[] {
removePlaceholder(container) removePlaceholder(container)
const isAppend = args.mode === 'append' const isAppend = args.mode === 'append'
const result = renderInlineComponent(definition, container, args.componentProps || {}, isAppend) const layout = { x: args.x, y: args.y, width: args.width, height: args.height }
const result = renderInlineComponent(definition, container, args.componentProps || {}, isAppend, layout)
// Track definition for snapshot capture // Track definition for snapshot capture
getWindowDefinitions().set(definition.id, { getWindowDefinitions().set(definition.id, {
@@ -168,11 +173,13 @@ export function createComponentHandlers(): ToolConfig[] {
type: 'array', type: 'array',
items: { type: 'string', enum: ['template', 'setup', 'style', 'props', 'imports'] }, items: { type: 'string', enum: ['template', 'setup', 'style', 'props', 'imports'] },
description: 'Campos a leer (default: template, setup, style)' description: 'Campos a leer (default: template, setup, style)'
} },
offset: { type: 'number', description: 'Linea inicial (1-based)' },
limit: { type: 'number', description: 'Numero de lineas a leer' }
}, },
required: ['id'] required: ['id']
}, },
handler: async (args: { id: string; fields?: string[] }) => { handler: async (args: { id: string; fields?: string[]; offset?: number; limit?: number }) => {
try { try {
const definition = await componentsApi.getById(args.id) const definition = await componentsApi.getById(args.id)
if (!definition) return `Error: "${args.id}" not found` if (!definition) return `Error: "${args.id}" not found`
@@ -191,7 +198,14 @@ export function createComponentHandlers(): ToolConfig[] {
output.push(`--- ${field} ---\n${arr?.length ? JSON.stringify(arr) : '(empty)'}`) output.push(`--- ${field} ---\n${arr?.length ? JSON.stringify(arr) : '(empty)'}`)
} else { } else {
const str = (value as string) || '' const str = (value as string) || ''
output.push(`--- ${field} (${str.length}) ---\n${str || '(empty)'}`) const lines = str.split('\n')
const total = lines.length
const start = Math.max(0, (args.offset || 1) - 1)
const end = args.limit ? start + args.limit : total
const sliced = lines.slice(start, end)
const numbered = sliced.map((l, i) => `${String(start + i + 1).padStart(4)}\t${l}`).join('\n')
const rangeInfo = (args.offset || args.limit) ? ` lines ${start + 1}-${Math.min(end, total)}/${total}` : ` ${total} lines`
output.push(`--- ${field}${rangeInfo} ---\n${numbered || '(empty)'}`)
} }
} }

View File

@@ -0,0 +1,177 @@
/**
* Singleton Whisper WebSocket Service
* One shared connection used by all voice components (FloatingVoice, useVoiceCapture, etc.)
*/
import { ref } from 'vue'
import { endpoints } from '../config/endpoints'
export type WhisperStatus = 'offline' | 'loading' | 'ready'
type TranscriptionCallback = (msg: {
success?: boolean
text?: string
error?: string
partial?: boolean
model?: string
device?: string
}) => void
// ====== Singleton state ======
const status = ref<WhisperStatus>('loading')
let socket: WebSocket | null = null
let reconnectTimer: number | null = null
const listeners = new Set<TranscriptionCallback>()
// ====== Connection management ======
function connect() {
if (socket?.readyState === WebSocket.OPEN || socket?.readyState === WebSocket.CONNECTING) return
console.log('[WhisperSocket] Connecting to', endpoints.whisper)
socket = new WebSocket(endpoints.whisper)
const timeout = setTimeout(() => {
if (socket && socket.readyState !== WebSocket.OPEN) {
console.error('[WhisperSocket] Connection timeout (10s)')
socket.close()
status.value = 'loading'
}
}, 10000)
socket.onopen = () => {
clearTimeout(timeout)
console.log('[WhisperSocket] Connected')
status.value = 'ready'
}
socket.onmessage = (event) => {
try {
const msg = JSON.parse(event.data)
if (msg.type === 'ready') {
console.log('[WhisperSocket] Server ready:', msg.model, msg.device)
status.value = 'ready'
} else if (msg.type === 'transcription') {
// Broadcast to all listeners
for (const cb of listeners) {
cb(msg)
}
}
} catch (e) {
console.error('[WhisperSocket] Message parse error:', e)
}
}
socket.onclose = () => {
console.log('[WhisperSocket] Closed, will reconnect...')
socket = null
status.value = 'loading'
scheduleReconnect()
}
socket.onerror = (e) => {
console.error('[WhisperSocket] Error:', e)
status.value = 'loading'
}
}
function scheduleReconnect() {
if (reconnectTimer) return
reconnectTimer = window.setTimeout(() => {
reconnectTimer = null
checkStatusAndConnect()
}, 2000)
}
async function checkStatusAndConnect() {
try {
const res = await fetch('/api/whisper/status')
const data = await res.json()
if (data.running) {
connect()
} else {
status.value = 'loading'
scheduleReconnect()
}
} catch {
status.value = 'loading'
scheduleReconnect()
}
}
// ====== Public API ======
/** Initialize the singleton connection (call once at app startup) */
export function initWhisperSocket() {
checkStatusAndConnect()
}
/** Send audio for transcription */
export function sendAudio(base64: string, language: string, partial: boolean) {
if (socket?.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify({
type: 'transcribe',
audio: base64,
language,
partial
}))
} else {
console.warn('[WhisperSocket] Not connected, dropping audio')
}
}
/** Subscribe to transcription results. Returns unsubscribe function. */
export function onTranscription(callback: TranscriptionCallback): () => void {
listeners.add(callback)
return () => listeners.delete(callback)
}
/** Get reactive status */
export function getWhisperStatus() {
return status
}
/** Check if socket is connected */
export function isConnected(): boolean {
return socket?.readyState === WebSocket.OPEN
}
/** Force reconnect (e.g. when user toggles Whisper) */
export async function reconnect() {
if (status.value === 'loading' && socket?.readyState === WebSocket.CONNECTING) return
status.value = 'loading'
if (socket) {
socket.close()
socket = null
}
try {
const res = await fetch('/api/whisper/toggle', { method: 'POST' })
const data = await res.json()
if (data.running) {
connect()
} else {
// Poll until ready
const poll = async () => {
for (let i = 0; i < 60; i++) {
await new Promise(r => setTimeout(r, 2000))
try {
const s = await fetch('/api/whisper/status')
const d = await s.json()
if (d.running) {
connect()
return
}
} catch { /* retry */ }
}
status.value = 'offline'
}
poll()
}
} catch {
status.value = 'loading'
scheduleReconnect()
}
}

View File

@@ -3,7 +3,7 @@
"version": "1.0.0", "version": "1.0.0",
"description": "Dynamic canvas for Claude Code interaction", "description": "Dynamic canvas for Claude Code interaction",
"scripts": { "scripts": {
"kill-ports": "node -e \"const {execSync} = require('child_process'); [4101,4102,4103,4105].forEach(p => { try { const pid = execSync('netstat -ano | findstr :' + p + ' | findstr LISTENING', {encoding:'utf8'}).split(/\\s+/).pop().trim(); if(pid) execSync('taskkill /PID ' + pid + ' /F', {stdio:'ignore'}); } catch(e){} }); console.log('Ports cleared');\"", "kill-ports": "node -e \"const {execSync} = require('child_process'); [4101,4102,4103,4105].forEach(p => { try { const pid = execSync('netstat -ano | findstr :' + p + ' | findstr LISTENING', {encoding:'utf8'}).split(/\\s+/).pop().trim(); if(pid) execSync('taskkill /PID ' + pid + ' /F', {stdio:'ignore'}); } catch(e){} }); console.log('Ports cleared (4104/whisper preserved)');\"",
"start": "bun run kill-ports && concurrently -n api,terminal,frontend -c blue,yellow,green \"cd server && bun --watch run index.ts\" \"cd server && bun run terminal.ts\" \"cd frontend && bun run dev --host\"", "start": "bun run kill-ports && concurrently -n api,terminal,frontend -c blue,yellow,green \"cd server && bun --watch run index.ts\" \"cd server && bun run terminal.ts\" \"cd frontend && bun run dev --host\"",
"start:api": "cd server && bun --watch run index.ts", "start:api": "cd server && bun --watch run index.ts",
"start:terminal": "cd server && bun run terminal.ts", "start:terminal": "cd server && bun run terminal.ts",

View File

@@ -50,7 +50,7 @@ export async function handleWhisperRoutes(req: Request): Promise<Response | null
return Response.json({ return Response.json({
...result, ...result,
...state, ...state,
message: state.enabled ? 'Whisper enabled (GPU)' : 'Whisper disabled (using Web Speech API)' message: state.running ? 'Whisper GPU running' : 'Whisper GPU starting...'
}) })
} }

View File

@@ -1,6 +1,7 @@
/** /**
* Whisper Service - Manages the Python Whisper server process * Whisper Service - Singleton persistent GPU speech-to-text server
* Provides GPU-accelerated speech-to-text as an alternative to Web Speech API * Auto-starts with the system, auto-restarts on crash.
* Single instance processes all client requests.
*/ */
import { join } from 'path' import { join } from 'path'
@@ -8,18 +9,19 @@ import { Subprocess } from 'bun'
const WHISPER_PORT = 4104 const WHISPER_PORT = 4104
const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py') const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
const RESTART_DELAY_MS = 3000 // Wait before auto-restart after crash
interface WhisperState { interface WhisperState {
enabled: boolean enabled: boolean
running: boolean running: boolean
starting: boolean // Prevents multiple simultaneous start attempts starting: boolean
process: Subprocess | null process: Subprocess | null
model: string model: string
device: string device: string
} }
const state: WhisperState = { const state: WhisperState = {
enabled: false, enabled: true, // Always enabled by default
running: false, running: false,
starting: false, starting: false,
process: null, process: null,
@@ -32,104 +34,16 @@ const state: WhisperState = {
*/ */
async function killProcessOnPort(port: number): Promise<void> { async function killProcessOnPort(port: number): Promise<void> {
try { try {
// Use PowerShell to find and kill process on port
const proc = Bun.spawn(['powershell', '-Command', const proc = Bun.spawn(['powershell', '-Command',
`Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }` `Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
], { stdout: 'ignore', stderr: 'ignore' }) ], { stdout: 'ignore', stderr: 'ignore' })
await proc.exited await proc.exited
// Wait a moment for port to be released
await new Promise(resolve => setTimeout(resolve, 1000)) await new Promise(resolve => setTimeout(resolve, 1000))
} catch { } catch {
// Ignore errors // Ignore errors
} }
} }
/**
* Start the Whisper Python server
*/
export async function startWhisperServer(): Promise<boolean> {
// Prevent multiple simultaneous start attempts
if (state.starting) {
return false
}
if (state.running && state.process) {
return true
}
state.starting = true
console.log(`[Whisper] Starting (${state.model})...`)
// Kill any existing process on the port
await killProcessOnPort(WHISPER_PORT)
try {
// Use Bun.spawn with inherit to show logs directly in console
// -u flag disables Python output buffering for real-time logs
const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
cwd: join(import.meta.dir, '..'),
stdout: 'inherit',
stderr: 'inherit',
env: { ...process.env, PYTHONUNBUFFERED: '1' }
})
state.process = proc
// Wait a bit for the server to start
await new Promise(resolve => setTimeout(resolve, 2000))
// Check if process is still running
if (proc.exitCode !== null) {
console.error('[Whisper] Process exited with code:', proc.exitCode)
state.process = null
state.starting = false
return false
}
// Check if WebSocket is ready
const isListening = await checkPort(WHISPER_PORT)
if (isListening) {
console.log('[Whisper] Ready')
state.running = true
state.enabled = true
state.starting = false
return true
}
// Wait more if model is still loading (up to 120 seconds total for large models)
for (let i = 0; i < 40; i++) {
await new Promise(resolve => setTimeout(resolve, 3000))
if (proc.exitCode !== null) {
console.error('[Whisper] Process died')
state.process = null
state.starting = false
return false
}
const ready = await checkPort(WHISPER_PORT)
if (ready) {
console.log('[Whisper] Ready')
state.running = true
state.enabled = true
state.starting = false
return true
}
}
console.error('[Whisper] Timeout (120s)')
state.starting = false
return false
} catch (err: any) {
console.error('[Whisper] Error:', err.message)
state.process = null
state.starting = false
return false
}
}
/** /**
* Check if Whisper WebSocket is ready using PowerShell * Check if Whisper WebSocket is ready using PowerShell
*/ */
@@ -152,7 +66,125 @@ async function checkPort(port: number): Promise<boolean> {
} }
/** /**
* Stop the Whisper server * Monitor the Whisper process and auto-restart on crash
*/
function monitorProcess(proc: Subprocess) {
proc.exited.then((exitCode) => {
console.error(`[Whisper] Process exited with code ${exitCode}`)
state.process = null
state.running = false
state.starting = false
// Auto-restart after delay
console.log(`[Whisper] Auto-restarting in ${RESTART_DELAY_MS / 1000}s...`)
setTimeout(() => {
startWhisperServer().catch(err => {
console.error('[Whisper] Auto-restart failed:', err)
})
}, RESTART_DELAY_MS)
})
}
/**
* Start the Whisper Python server (singleton - only one instance)
*/
export async function startWhisperServer(): Promise<boolean> {
// Prevent multiple simultaneous start attempts
if (state.starting) {
console.log('[Whisper] Already starting, skipping')
return false
}
// Already running
if (state.running && state.process) {
console.log('[Whisper] Already running')
return true
}
// Check if an external instance is already listening
const alreadyListening = await checkPort(WHISPER_PORT)
if (alreadyListening) {
console.log('[Whisper] External instance already running on port', WHISPER_PORT)
state.running = true
state.enabled = true
return true
}
state.starting = true
console.log(`[Whisper] Starting singleton server (${state.model})...`)
// Kill any orphan process on the port
await killProcessOnPort(WHISPER_PORT)
try {
const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
cwd: join(import.meta.dir, '..'),
stdout: 'inherit',
stderr: 'inherit',
env: { ...process.env, PYTHONUNBUFFERED: '1' }
})
state.process = proc
// Monitor for crashes and auto-restart
monitorProcess(proc)
// Wait for initial startup
await new Promise(resolve => setTimeout(resolve, 2000))
// Check if process died immediately
if (proc.exitCode !== null) {
console.error('[Whisper] Process exited immediately with code:', proc.exitCode)
state.process = null
state.starting = false
return false
}
// Check if WebSocket is ready
const isListening = await checkPort(WHISPER_PORT)
if (isListening) {
console.log('[Whisper] Server ready (GPU)')
state.running = true
state.enabled = true
state.starting = false
return true
}
// Wait for model loading (up to 120 seconds for large-v3)
for (let i = 0; i < 40; i++) {
await new Promise(resolve => setTimeout(resolve, 3000))
if (proc.exitCode !== null) {
console.error('[Whisper] Process died during model loading')
state.process = null
state.starting = false
return false
}
const ready = await checkPort(WHISPER_PORT)
if (ready) {
console.log('[Whisper] Server ready (GPU)')
state.running = true
state.enabled = true
state.starting = false
return true
}
}
console.error('[Whisper] Timeout waiting for server (120s)')
state.starting = false
return false
} catch (err: any) {
console.error('[Whisper] Start error:', err.message)
state.process = null
state.starting = false
return false
}
}
/**
* Stop the Whisper server (only for manual override, not used in normal flow)
*/ */
export function stopWhisperServer(): boolean { export function stopWhisperServer(): boolean {
if (!state.process) { if (!state.process) {
@@ -163,8 +195,7 @@ export function stopWhisperServer(): boolean {
state.process.kill() state.process.kill()
state.process = null state.process = null
state.running = false state.running = false
state.enabled = false console.log('[Whisper] Stopped manually')
console.log('[Whisper] Stopped')
return true return true
} catch (err) { } catch (err) {
console.error('[Whisper] Stop error:', err) console.error('[Whisper] Stop error:', err)
@@ -173,27 +204,26 @@ export function stopWhisperServer(): boolean {
} }
/** /**
* Toggle Whisper server on/off (async - returns immediately when starting) * Toggle is now a no-op for stop - Whisper always stays on.
* If not running, triggers a start.
*/ */
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> { export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
// Prevent toggle while starting
if (state.starting) { if (state.starting) {
return { enabled: false, success: false, starting: true } return { enabled: true, success: false, starting: true }
} }
if (state.enabled && state.running) { if (state.running) {
const success = stopWhisperServer() // Already running - just confirm it's on
return { enabled: false, success, starting: false } return { enabled: true, success: true, starting: false }
} else {
// Start server in background - don't await
startWhisperServer().catch(err => {
console.error('[Whisper] Start error:', err)
state.starting = false
})
// Return immediately - frontend will poll for status
return { enabled: false, success: true, starting: true }
} }
// Not running - start it
startWhisperServer().catch(err => {
console.error('[Whisper] Start error:', err)
state.starting = false
})
return { enabled: true, success: true, starting: true }
} }
/** /**
@@ -217,13 +247,12 @@ export async function getWhisperState(): Promise<{
state.enabled = true state.enabled = true
} else if (!isListening && state.running) { } else if (!isListening && state.running) {
state.running = false state.running = false
state.enabled = false // Keep enabled=true since we auto-restart
state.process = null
} }
} }
return { return {
enabled: state.enabled, enabled: true, // Always enabled
running: state.running, running: state.running,
starting: state.starting, starting: state.starting,
port: WHISPER_PORT, port: WHISPER_PORT,
@@ -233,15 +262,12 @@ export async function getWhisperState(): Promise<{
} }
/** /**
* Check if Whisper is enabled * Check if Whisper is running
*/ */
export function isWhisperEnabled(): boolean { export function isWhisperEnabled(): boolean {
return state.enabled && state.running return state.running
} }
// WebSocket server for Whisper (proxies to Python server or handles directly)
let whisperWsServer: any = null
export function getWhisperPort(): number { export function getWhisperPort(): number {
return WHISPER_PORT return WHISPER_PORT
} }

View File

@@ -3,10 +3,12 @@
* Terminal Server - Independent process * Terminal Server - Independent process
* This runs separately from the main server to maintain stable Claude Code sessions * This runs separately from the main server to maintain stable Claude Code sessions
* even when the main server restarts due to code changes. * even when the main server restarts due to code changes.
* Also manages the Whisper GPU server (singleton, persistent).
*/ */
import { startTerminalServer } from './services/terminal' import { startTerminalServer } from './services/terminal'
import { startSyncServer } from './services/sync-server' import { startSyncServer } from './services/sync-server'
import { startWhisperServer } from './services/whisper'
import { WORKING_DIR } from './config' import { WORKING_DIR } from './config'
console.log('') console.log('')
@@ -14,6 +16,7 @@ console.log('='.repeat(50))
console.log('Terminal Server (Independent Process)') console.log('Terminal Server (Independent Process)')
console.log(` Terminal WebSocket: ws://localhost:4103`) console.log(` Terminal WebSocket: ws://localhost:4103`)
console.log(` Sync WebSocket (Git + Torch): ws://localhost:4105`) console.log(` Sync WebSocket (Git + Torch): ws://localhost:4105`)
console.log(` Whisper GPU: ws://localhost:4104 (auto-start)`)
console.log(` Working Dir: ${WORKING_DIR}`) console.log(` Working Dir: ${WORKING_DIR}`)
console.log('') console.log('')
console.log('This process is stable and won\'t restart') console.log('This process is stable and won\'t restart')
@@ -23,3 +26,14 @@ console.log('')
startTerminalServer() startTerminalServer()
startSyncServer() startSyncServer()
// Auto-start Whisper GPU server (singleton, persistent, auto-restart on crash)
startWhisperServer().then(ok => {
if (ok) {
console.log('[Whisper] GPU server started successfully')
} else {
console.warn('[Whisper] Failed initial start (will auto-retry)')
}
}).catch(err => {
console.error('[Whisper] Boot error:', err)
})

View File

@@ -26,35 +26,25 @@ except ImportError as e:
def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes: def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes:
""" """
Convert audio data to WAV format using ffmpeg. Convert audio data to WAV format using ffmpeg.
Whisper requires WAV/PCM format, but browsers typically record in WebM/Opus. Uses stdin/stdout pipes so ffmpeg probes the actual data format
instead of relying on file extensions.
""" """
# Create temp files for input and output
with tempfile.NamedTemporaryFile(suffix=f".{input_format}", delete=False) as in_file:
in_file.write(input_data)
input_path = in_file.name
output_path = input_path.replace(f".{input_format}", ".wav")
try: try:
# Use ffmpeg to convert to WAV (16kHz mono, which Whisper prefers)
result = subprocess.run([ result = subprocess.run([
"ffmpeg", "-y", # Overwrite output "ffmpeg", "-y",
"-i", input_path, # Input file "-i", "pipe:0", # Read from stdin (auto-detect format)
"-ar", "16000", # Sample rate 16kHz "-ar", "16000", # Sample rate 16kHz
"-ac", "1", # Mono "-ac", "1", # Mono
"-c:a", "pcm_s16le", # PCM 16-bit little-endian "-c:a", "pcm_s16le", # PCM 16-bit little-endian
output_path "-f", "wav", # Output format
], capture_output=True, text=True, timeout=30) "pipe:1" # Write to stdout
], input=input_data, capture_output=True, timeout=30)
if result.returncode != 0: if result.returncode != 0:
print(f"[Whisper] ffmpeg error: {result.stderr}") print(f"[Whisper] ffmpeg error: {result.stderr.decode('utf-8', errors='replace')}")
return None return None
# Read the converted WAV file return result.stdout
with open(output_path, "rb") as f:
wav_data = f.read()
return wav_data
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
print("[Whisper] ffmpeg conversion timed out") print("[Whisper] ffmpeg conversion timed out")
@@ -65,16 +55,6 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
except Exception as e: except Exception as e:
print(f"[Whisper] Conversion error: {e}") print(f"[Whisper] Conversion error: {e}")
return None return None
finally:
# Cleanup temp files
try:
os.unlink(input_path)
except:
pass
try:
os.unlink(output_path)
except:
pass
# Configuration # Configuration
HOST = "0.0.0.0" # Listen on all interfaces (needed for Traefik proxy) HOST = "0.0.0.0" # Listen on all interfaces (needed for Traefik proxy)