asi se fue xd

This commit is contained in:
2026-02-18 12:13:22 -06:00
parent d27da30494
commit d0fdd04132
17 changed files with 612 additions and 735 deletions

View File

@@ -11,6 +11,7 @@
"@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git",
"@xterm/addon-fit": "^0.11.0",
"@xterm/addon-web-links": "^0.12.0",
"@xterm/addon-webgl": "^0.19.0",
"@xterm/xterm": "^6.0.0",
"pinia": "^3.0.4",
"vite-plugin-pwa": "^1.2.0",
@@ -2624,6 +2625,12 @@
"integrity": "sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==",
"license": "MIT"
},
"node_modules/@xterm/addon-webgl": {
"version": "0.19.0",
"resolved": "https://registry.npmjs.org/@xterm/addon-webgl/-/addon-webgl-0.19.0.tgz",
"integrity": "sha512-b3fMOsyLVuCeNJWxolACEUED0vm7qC0cy4wRvf3oURSzDTYVQiGPhTnhWZwIHdvC48Y+oLhvYXnY4XDXPoJo6A==",
"license": "MIT"
},
"node_modules/@xterm/xterm": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/@xterm/xterm/-/xterm-6.0.0.tgz",

View File

@@ -14,6 +14,7 @@
"@nucleoriofrio/webmcp": "git+https://gitea.nucleoriofrio.com/nucleo000/webmcp.git",
"@xterm/addon-fit": "^0.11.0",
"@xterm/addon-web-links": "^0.12.0",
"@xterm/addon-webgl": "^0.19.0",
"@xterm/xterm": "^6.0.0",
"pinia": "^3.0.4",
"vite-plugin-pwa": "^1.2.0",

View File

@@ -7,8 +7,6 @@ import FloatingTerminal from './components/FloatingTerminal.vue'
import FloatingResponse from './components/FloatingResponse.vue'
import FloatingVoice from './components/FloatingVoice.vue'
import AgentBar from './components/AgentBar.vue'
import HookNotifications from './components/HookNotifications.vue'
import NotificationLog from './components/NotificationLog.vue'
import PwaInstallBanner from './components/PwaInstallBanner.vue'
import { initWebMCP, getWebMCP } from './services/webmcp'
import { initTorch, destroyTorch } from './services/torch'
@@ -18,7 +16,6 @@ import { setTerminalControls } from './services/tools/handlers/terminalHandlers'
import { setResponseControls } from './services/tools/handlers/responseHandlers'
import { useCanvasStore } from './stores/canvas'
import { useProjectCanvasStore } from './stores/projectCanvas'
import { useClaudeHooksStore } from './stores/claude-hooks'
const route = useRoute()
const router = useRouter()
@@ -68,12 +65,9 @@ function clearDebugLogs() {
}
const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null)
const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null)
const notifLogRef = ref<InstanceType<typeof NotificationLog> | null>(null)
const voiceRef = ref<InstanceType<typeof FloatingVoice> | null>(null)
const canvasStore = useCanvasStore()
const projectCanvasStore = useProjectCanvasStore()
const hooksStore = useClaudeHooksStore()
// Voice FAB push-to-talk state
const voicePTTActive = ref(false)
let voiceTouchStarted = false
@@ -237,15 +231,6 @@ function connectStatusWs() {
}
}
// Rich hook data → toast notifications
if (msg.type === 'claude-hook') {
hooksStore.processHook(msg)
}
// Permission request → persistent toast with allow/deny
if (msg.type === 'claude-permission') {
hooksStore.processPermission(msg)
}
} catch { /* ignore non-JSON messages */ }
}
@@ -332,8 +317,6 @@ onMounted(async () => {
// Setup response controls for MCP tools
setResponseControls({
addMessage: (message: string, type?: 'info' | 'success' | 'warning' | 'error') => {
// Also log to notification log
notifLogRef.value?.addResponseEntry(message, type || 'info')
if (responseRef.value) {
return responseRef.value.addMessage(message, type)
}
@@ -545,11 +528,6 @@ watch(() => route.name, (newPage) => {
<!-- Floating Response (Agent UI messages) -->
<FloatingResponse ref="responseRef" />
<!-- Hook Notifications (toasts from Claude Code hooks) -->
<HookNotifications />
<!-- Notification Log (temporary - collects all notifications, persists to localStorage) -->
<NotificationLog ref="notifLogRef" />
<!-- Floating Voice Input -->
<FloatingVoice ref="voiceRef" v-model="showVoice" />

View File

@@ -99,7 +99,7 @@ onUnmounted(() => {
flex: 1;
position: relative;
min-height: 100%;
overflow: hidden;
overflow: auto;
}
.canvas-placeholder {

View File

@@ -68,31 +68,16 @@ function formatSessionLabel(s: SessionInfo): string {
</select>
</div>
<!-- Voice Mode Toggle -->
<!-- Whisper Status -->
<div class="is-section">
<label class="is-label">Mode</label>
<div class="is-mode-row">
<button
class="is-mode-btn"
:class="{ active: voice.voiceMode.value === 'webspeech' }"
:disabled="voice.isRecording.value"
@click="voice.voiceMode.value !== 'webspeech' && voice.toggleWhisperMode()"
>
<span class="is-mode-icon">Web</span>
<span class="is-mode-label">Speech API</span>
</button>
<button
class="is-mode-btn"
:class="{
active: voice.voiceMode.value === 'whisper',
loading: voice.whisperStatus.value === 'loading'
}"
:disabled="voice.isRecording.value"
@click="voice.voiceMode.value !== 'whisper' && voice.toggleWhisperMode()"
<div
class="is-mode-btn active"
>
<span class="is-mode-icon">GPU</span>
<span class="is-mode-label">Whisper</span>
</button>
</div>
</div>
<div class="is-status">
<span
@@ -104,10 +89,7 @@ function formatSessionLabel(s: SessionInfo): string {
}"
></span>
<span class="is-status-text">
{{ voice.voiceMode.value === 'whisper'
? (voice.whisperStatus.value === 'ready' ? 'Whisper ready' : voice.whisperStatus.value === 'loading' ? 'Starting...' : 'Offline')
: 'Web Speech API'
}}
{{ voice.whisperStatus.value === 'ready' ? 'Whisper ready' : voice.whisperStatus.value === 'loading' ? 'Starting...' : 'Offline' }}
</span>
</div>
</div>

View File

@@ -1,30 +1,11 @@
import { ref, watch, type Ref } from 'vue'
import { endpoints } from '../config/endpoints'
// Web Speech API types (not in default TS lib)
interface SpeechRecognitionEvent extends Event {
resultIndex: number
results: SpeechRecognitionResultList
}
interface SpeechRecognitionErrorEvent extends Event {
error: string
message?: string
}
interface SpeechRecognition extends EventTarget {
continuous: boolean
interimResults: boolean
lang: string
onresult: ((event: SpeechRecognitionEvent) => void) | null
onerror: ((event: SpeechRecognitionErrorEvent) => void) | null
onend: (() => void) | null
start(): void
stop(): void
abort(): void
}
export type VoiceMode = 'webspeech' | 'whisper'
import {
initWhisperSocket,
sendAudio,
onTranscription,
getWhisperStatus,
isConnected
} from '../services/whisperSocket'
export type WhisperStatus = 'offline' | 'loading' | 'ready'
export interface VoiceCapture {
@@ -34,7 +15,7 @@ export interface VoiceCapture {
interimTranscript: Ref<string>
animatedTranscript: Ref<string>
error: Ref<string>
voiceMode: Ref<VoiceMode>
voiceMode: Ref<'whisper'>
whisperStatus: Ref<WhisperStatus>
audioDevices: Ref<MediaDeviceInfo[]>
selectedDeviceId: Ref<string>
@@ -44,9 +25,7 @@ export interface VoiceCapture {
// Actions
startRecording: () => void
stopRecording: () => void
toggleWhisperMode: () => Promise<void>
checkWhisperStatus: () => Promise<any>
loadAudioDevices: () => Promise<void>
loadAudioDevices: (skipPermission?: boolean) => Promise<void>
selectMicrophone: (deviceId: string) => void
playLastAudio: () => void
init: () => Promise<void>
@@ -54,6 +33,8 @@ export interface VoiceCapture {
clearTranscript: () => void
}
const GPU_TIMEOUT_MS = 30_000 // 30s timeout waiting for GPU
export function useVoiceCapture(options?: {
onNotification?: (message: string, type: 'info' | 'success' | 'error', duration?: number) => void
}): VoiceCapture {
@@ -65,290 +46,97 @@ export function useVoiceCapture(options?: {
const interimTranscript = ref('')
const animatedTranscript = ref('')
const error = ref('')
const voiceMode = ref<VoiceMode>('webspeech')
const whisperStatus = ref<WhisperStatus>('offline')
const voiceMode = ref<'whisper'>('whisper') // Always whisper, no web speech
const audioDevices = ref<MediaDeviceInfo[]>([])
const selectedDeviceId = ref('')
const isAndroid = ref(false)
// Audio debug & save
const lastAudioUrl = ref('')
const isPlayingAudio = ref(false)
// ====== Internal state ======
let recognition: SpeechRecognition | null = null
let lastProcessedResult = ''
// Typing animation
let typingTimeout: number | null = null
let lastAnimatedLength = 0
// Whisper
const WHISPER_WS_URL = endpoints.whisper
let whisperSocket: WebSocket | null = null
// ====== Internal ======
const sharedWhisperStatus = getWhisperStatus()
const whisperStatus = ref<WhisperStatus>(sharedWhisperStatus.value)
let mediaRecorder: MediaRecorder | null = null
let audioChunks: Blob[] = []
let chunkInterval: number | null = null
const CHUNK_INTERVAL_MS = 3000
let mediaStream: MediaStream | null = null
let supportedMimeType = 'audio/webm;codecs=opus'
// Audio playback debug
let audioElement: HTMLAudioElement | null = null
let recordingStartTime = 0
let unsubTranscription: (() => void) | null = null
let gpuTimeout: number | null = null
// Typing animation
let typingTimeout: number | null = null
let lastAnimatedLength = 0
// Keep local status in sync with shared
watch(sharedWhisperStatus, (val) => {
whisperStatus.value = val
})
// ====== Mobile / Audio Format ======
function checkMobile() {
const ua = navigator.userAgent
isAndroid.value = /Android/i.test(ua)
isAndroid.value = /Android/i.test(navigator.userAgent)
}
function detectAudioFormat(): string {
const formats = [
'audio/webm;codecs=opus',
'audio/webm',
'audio/mp4',
'audio/mp4;codecs=mp4a.40.2',
'audio/aac',
'audio/ogg;codecs=opus',
'audio/wav'
'audio/webm;codecs=opus', 'audio/webm',
'audio/mp4', 'audio/mp4;codecs=mp4a.40.2',
'audio/aac', 'audio/ogg;codecs=opus', 'audio/wav'
]
for (const format of formats) {
if (MediaRecorder.isTypeSupported(format)) {
console.log(`[VoiceCapture] Using audio format: ${format}`)
return format
for (const f of formats) {
if (MediaRecorder.isTypeSupported(f)) {
console.log(`[VoiceCapture] Audio format: ${f}`)
return f
}
}
console.warn('[VoiceCapture] No preferred format supported, using default')
return ''
}
// ====== Web Speech API ======
// ====== Whisper transcription handler ======
function initRecognition(): SpeechRecognition | null {
const SpeechRecognitionCtor = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition
if (!SpeechRecognitionCtor) {
error.value = 'Speech recognition not supported in this browser'
return null
}
function handleTranscription(msg: any) {
if (!isRecording.value) return
const rec: SpeechRecognition = new SpeechRecognitionCtor()
rec.continuous = !isAndroid.value
rec.interimResults = true
rec.lang = 'es-419'
if (isAndroid.value) {
console.log('[VoiceCapture] Android detected - using non-continuous mode')
}
rec.onresult = (event: SpeechRecognitionEvent) => {
let interim = ''
let final = ''
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i]
if (!result || !result[0]) continue
if (result.isFinal) {
final += result[0].transcript + ' '
} else {
interim += result[0].transcript
}
if (msg.success && msg.text) {
const fullText = msg.text.trim()
transcript.value = fullText + ' '
interimTranscript.value = ''
if (!msg.partial) {
console.log(`[VoiceCapture] WHISPER (${msg.model}/${msg.device}):`, fullText)
}
if (final) {
const trimmedFinal = final.trim()
if (isAndroid.value && lastProcessedResult && trimmedFinal.startsWith(lastProcessedResult.trim())) {
const newPart = trimmedFinal.slice(lastProcessedResult.trim().length).trim()
if (newPart) {
transcript.value += newPart + ' '
lastProcessedResult = trimmedFinal
}
} else {
transcript.value += final
lastProcessedResult = trimmedFinal
}
}
interimTranscript.value = interim
}
rec.onerror = (event: SpeechRecognitionErrorEvent) => {
// no-speech and aborted are transient — don't kill the session
if (event.error === 'no-speech' || event.error === 'aborted') {
console.log('[VoiceCapture] Transient error:', event.error, '(will auto-restart)')
return
}
console.error('[VoiceCapture] Recognition error:', event.error)
if (event.error === 'not-allowed') {
error.value = 'Microphone access denied'
} else {
error.value = `Error: ${event.error}`
}
isRecording.value = false
}
rec.onend = () => {
if (isRecording.value && voiceMode.value === 'webspeech') {
if (isAndroid.value) {
isRecording.value = false
console.log('[VoiceCapture] Android session ended - tap mic to continue')
} else {
rec.start()
}
}
}
return rec
}
// ====== Whisper Functions ======
async function checkWhisperStatusFn(updateLoading = true): Promise<any> {
try {
const res = await fetch('/api/whisper/status')
const data = await res.json()
if (data.enabled) {
voiceMode.value = 'whisper'
}
if (data.running) {
whisperStatus.value = 'ready'
} else if (updateLoading && (data.starting || false)) {
whisperStatus.value = 'loading'
} else if (!data.running) {
if (voiceMode.value === 'whisper' && !data.starting) {
whisperStatus.value = 'offline'
}
}
return data
} catch {
voiceMode.value = 'webspeech'
whisperStatus.value = 'offline'
return null
} else if (msg.error) {
error.value = msg.error
console.error('[VoiceCapture] Whisper error:', msg.error)
}
}
async function pollWhisperStatus(): Promise<void> {
const maxAttempts = 60
let attempts = 0
// ====== Recording ======
while (attempts < maxAttempts) {
await new Promise(resolve => setTimeout(resolve, 2000))
attempts++
function startRecording() {
error.value = ''
try {
const status = await checkWhisperStatusFn(false)
if (!status) continue
// Start capturing audio immediately, regardless of GPU status
startMediaRecorder()
if (status.starting) {
console.log(`[VoiceCapture] Still starting... (${attempts * 2}s)`)
continue
// If GPU not ready yet, start timeout
if (!isConnected()) {
console.log('[VoiceCapture] Recording started, waiting for GPU...')
gpuTimeout = window.setTimeout(() => {
if (isRecording.value && !isConnected()) {
error.value = 'Whisper GPU timeout — server not available'
notify('Whisper GPU not available', 'error')
stopRecording()
}
if (status.running && status.enabled) {
console.log('[VoiceCapture] Server ready!')
notify('Whisper GPU ready!', 'success')
connectWhisperSocket()
whisperStatus.value = 'ready'
return
}
console.log('[VoiceCapture] Server failed to start')
notify('Whisper server failed to start', 'error')
whisperStatus.value = 'offline'
return
} catch (e) {
console.error('[VoiceCapture] Polling error:', e)
}
}
notify('Whisper server timeout', 'error')
whisperStatus.value = 'offline'
}
function connectWhisperSocket() {
if (whisperStatus.value !== 'ready') {
console.log('[VoiceCapture] Whisper not ready, skipping connection')
return
}
if (whisperSocket?.readyState === WebSocket.OPEN) return
console.log('[VoiceCapture] Connecting to Whisper at:', WHISPER_WS_URL)
whisperSocket = new WebSocket(WHISPER_WS_URL)
const connectionTimeout = setTimeout(() => {
if (whisperSocket && whisperSocket.readyState !== WebSocket.OPEN) {
console.error('[VoiceCapture] Whisper connection timeout (10s)')
whisperSocket.close()
whisperStatus.value = 'offline'
}
}, 10000)
whisperSocket.onopen = () => {
clearTimeout(connectionTimeout)
console.log('[VoiceCapture] Whisper WebSocket connected')
whisperStatus.value = 'ready'
}
whisperSocket.onmessage = (event) => {
try {
const msg = JSON.parse(event.data)
if (msg.type === 'ready') {
console.log('[VoiceCapture] Whisper ready:', msg.model, msg.device)
whisperStatus.value = 'ready'
} else if (msg.type === 'transcription') {
if (msg.success && msg.text) {
const fullText = msg.text.trim()
if (msg.partial) {
transcript.value = fullText + ' '
interimTranscript.value = ''
} else {
transcript.value = fullText + ' '
interimTranscript.value = ''
console.log(`[VoiceCapture] WHISPER-GPU (${msg.model}/${msg.device}):`, fullText)
}
} else if (msg.error) {
error.value = msg.error
console.error('[VoiceCapture] Whisper error:', msg.error)
}
}
} catch (e) {
console.error('[VoiceCapture] Whisper message error:', e)
}
}
whisperSocket.onclose = () => {
console.log('[VoiceCapture] Whisper WebSocket closed')
whisperStatus.value = 'offline'
}
whisperSocket.onerror = (e) => {
console.error('[VoiceCapture] Whisper WebSocket error:', e)
whisperStatus.value = 'offline'
}, GPU_TIMEOUT_MS)
}
}
function disconnectWhisperSocket() {
if (whisperSocket) {
whisperSocket.close()
whisperSocket = null
}
whisperStatus.value = 'offline'
}
async function startWhisperRecording() {
if (!whisperSocket || whisperSocket.readyState !== WebSocket.OPEN) {
console.warn('[VoiceCapture] Whisper socket not connected, attempting to connect...')
connectWhisperSocket()
await new Promise(resolve => setTimeout(resolve, 500))
if (!whisperSocket || whisperSocket.readyState !== WebSocket.OPEN) {
error.value = 'Whisper server not connected'
notify('Whisper not connected. Try toggling GPU mode.', 'error')
return
}
}
async function startMediaRecorder() {
try {
const audioConstraints: MediaTrackConstraints = {
echoCancellation: true,
@@ -375,17 +163,21 @@ export function useVoiceCapture(options?: {
}
}
audioChunks = []
mediaRecorder.start(100)
isRecording.value = true
recordingStartTime = Date.now()
console.log(`[VoiceCapture] Whisper recording started`)
// Permission granted via user gesture — reload devices with labels
// Reload devices with labels now that we have permission
loadAudioDevices(true)
// Send chunks periodically — only when GPU is connected
chunkInterval = window.setInterval(() => {
if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) {
if (audioChunks.length > 0 && isConnected()) {
// GPU came online — clear timeout if still pending
if (gpuTimeout) {
clearTimeout(gpuTimeout)
gpuTimeout = null
}
sendAudioChunk(false)
}
}, CHUNK_INTERVAL_MS)
@@ -397,6 +189,10 @@ export function useVoiceCapture(options?: {
function sendAudioChunk(isFinal: boolean) {
if (audioChunks.length === 0) return
if (!isConnected()) {
console.log('[VoiceCapture] GPU not connected, holding audio')
return
}
const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm'
const audioBlob = new Blob(audioChunks, { type: mimeType })
@@ -414,24 +210,22 @@ export function useVoiceCapture(options?: {
const reader = new FileReader()
reader.onloadend = () => {
const base64 = (reader.result as string).split(',')[1]
if (whisperSocket?.readyState === WebSocket.OPEN) {
whisperSocket.send(JSON.stringify({
type: 'transcribe',
audio: base64,
language: 'es',
partial: !isFinal
}))
}
sendAudio(base64, 'es', !isFinal)
}
reader.readAsDataURL(audioBlob)
}
function stopWhisperRecording() {
function stopRecording() {
if (gpuTimeout) {
clearTimeout(gpuTimeout)
gpuTimeout = null
}
if (chunkInterval) {
clearInterval(chunkInterval)
chunkInterval = null
}
// Send final chunk (only if GPU is connected)
if (audioChunks.length > 0) {
sendAudioChunk(true)
}
@@ -439,16 +233,16 @@ export function useVoiceCapture(options?: {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop()
}
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop())
mediaStream = null
}
isRecording.value = false
interimTranscript.value = ''
}
// ====== Audio Save & Debug Playback ======
// ====== Audio Save & Playback ======
function currentMicName(): string {
if (!selectedDeviceId.value) return 'Default'
@@ -457,9 +251,7 @@ export function useVoiceCapture(options?: {
}
function saveAudioForPlayback(blob: Blob) {
if (lastAudioUrl.value) {
URL.revokeObjectURL(lastAudioUrl.value)
}
if (lastAudioUrl.value) URL.revokeObjectURL(lastAudioUrl.value)
lastAudioUrl.value = URL.createObjectURL(blob)
saveRecordingToBackend(blob)
}
@@ -468,7 +260,6 @@ export function useVoiceCapture(options?: {
try {
const duration_ms = Date.now() - recordingStartTime
const reader = new FileReader()
reader.onloadend = async () => {
const base64 = (reader.result as string).split(',')[1]
const response = await fetch('/api/recordings', {
@@ -483,12 +274,9 @@ export function useVoiceCapture(options?: {
})
const data = await response.json()
if (data.success) {
console.log(`[VoiceCapture] Recording saved: ${data.filename} (${(data.size / 1024).toFixed(1)} KB)`)
} else {
console.error('[VoiceCapture] Failed to save recording:', data.error)
console.log(`[VoiceCapture] Recording saved: ${data.filename}`)
}
}
reader.readAsDataURL(blob)
} catch (e) {
console.error('[VoiceCapture] Error saving recording:', e)
@@ -497,163 +285,17 @@ export function useVoiceCapture(options?: {
function playLastAudio() {
if (!lastAudioUrl.value) return
if (isPlayingAudio.value && audioElement) {
audioElement.pause()
audioElement.currentTime = 0
isPlayingAudio.value = false
return
}
audioElement = new Audio(lastAudioUrl.value)
audioElement.onplay = () => { isPlayingAudio.value = true }
audioElement.onended = () => { isPlayingAudio.value = false }
audioElement.onpause = () => { isPlayingAudio.value = false }
audioElement.play().catch(e => {
console.error('[VoiceCapture] Failed to play audio:', e)
isPlayingAudio.value = false
})
}
// ====== Parallel Audio Capture (for Web Speech mode) ======
async function startAudioCapture() {
try {
const audioConstraints: MediaTrackConstraints = {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
...(selectedDeviceId.value ? { deviceId: { exact: selectedDeviceId.value } } : {})
}
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints })
const recorderOptions: MediaRecorderOptions = {}
if (supportedMimeType) {
recorderOptions.mimeType = supportedMimeType
}
mediaRecorder = new MediaRecorder(mediaStream, recorderOptions)
audioChunks = []
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data)
}
}
mediaRecorder.start(100)
recordingStartTime = Date.now()
console.log(`[VoiceCapture] Audio capture started (${mediaRecorder.mimeType})`)
// Permission is now granted via user gesture — reload devices with labels
loadAudioDevices(true)
} catch (e: any) {
console.error('[VoiceCapture] Audio capture error:', e)
}
}
function stopAudioCapture() {
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
mediaRecorder.stop()
}
// Build final blob and save
if (audioChunks.length > 0) {
const mimeType = mediaRecorder?.mimeType || supportedMimeType || 'audio/webm'
const audioBlob = new Blob(audioChunks, { type: mimeType })
audioChunks = []
if (audioBlob.size > 1000) {
saveAudioForPlayback(audioBlob)
}
}
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop())
mediaStream = null
}
}
// ====== Public Recording API ======
function startRecording() {
error.value = ''
if (voiceMode.value === 'whisper' && whisperStatus.value === 'ready') {
startWhisperRecording()
} else {
if (!recognition) {
recognition = initRecognition()
}
if (recognition) {
try {
recognition.start()
isRecording.value = true
// Capture raw audio in parallel for save/debug playback
startAudioCapture()
if (isAndroid.value) {
notify('Android: Tap mic again to continue recording', 'info', 3000)
}
} catch (e) {
console.error('[VoiceCapture] Failed to start:', e)
}
}
}
}
function stopRecording() {
if (voiceMode.value === 'whisper') {
stopWhisperRecording()
} else {
if (recognition) {
recognition.stop()
}
stopAudioCapture()
isRecording.value = false
}
interimTranscript.value = ''
}
async function toggleWhisperMode() {
if (whisperStatus.value === 'loading') return
whisperStatus.value = 'loading'
error.value = ''
if (voiceMode.value !== 'whisper') {
notify('Starting Whisper GPU server...', 'info', 10000)
}
try {
const res = await fetch('/api/whisper/toggle', { method: 'POST' })
const data = await res.json()
if (data.starting) {
console.log('[VoiceCapture] Server starting, polling...')
voiceMode.value = 'whisper'
await pollWhisperStatus()
return
}
if (data.enabled) {
voiceMode.value = 'whisper'
whisperStatus.value = data.running ? 'ready' : 'offline'
if (data.running) {
notify('Whisper GPU ready!', 'success')
connectWhisperSocket()
}
} else {
voiceMode.value = 'webspeech'
whisperStatus.value = 'offline'
notify('Using Web Speech API', 'info')
disconnectWhisperSocket()
}
} catch (e: any) {
error.value = 'Failed to toggle Whisper'
notify('Error starting Whisper server', 'error')
console.error('[VoiceCapture] Whisper toggle error:', e)
whisperStatus.value = 'offline'
}
audioElement.play().catch(() => { isPlayingAudio.value = false })
}
// ====== Microphone ======
@@ -661,18 +303,14 @@ export function useVoiceCapture(options?: {
async function loadAudioDevices(skipPermissionRequest = false) {
try {
if (!skipPermissionRequest) {
// Request permission to get device labels
const tempStream = await navigator.mediaDevices.getUserMedia({ audio: true })
tempStream.getTracks().forEach(track => track.stop())
}
const devices = await navigator.mediaDevices.enumerateDevices()
audioDevices.value = devices.filter(d => d.kind === 'audioinput')
if (!selectedDeviceId.value && audioDevices.value.length > 0) {
selectedDeviceId.value = audioDevices.value[0]?.deviceId || ''
}
console.log(`[VoiceCapture] Found ${audioDevices.value.length} audio devices`)
} catch (e) {
console.error('[VoiceCapture] Failed to enumerate devices:', e)
}
@@ -689,36 +327,26 @@ export function useVoiceCapture(options?: {
// ====== Typing Animation ======
function animateTyping(targetText: string) {
if (typingTimeout) {
clearTimeout(typingTimeout)
typingTimeout = null
}
if (typingTimeout) { clearTimeout(typingTimeout); typingTimeout = null }
if (targetText.length < animatedTranscript.value.length) {
animatedTranscript.value = targetText
lastAnimatedLength = targetText.length
return
}
const startIndex = lastAnimatedLength
function typeNext(index: number) {
if (index <= targetText.length) {
animatedTranscript.value = targetText.substring(0, index)
lastAnimatedLength = index
if (index < targetText.length) {
const delay = 15 + Math.random() * 10
typingTimeout = window.setTimeout(() => typeNext(index + 1), delay)
typingTimeout = window.setTimeout(() => typeNext(index + 1), 15 + Math.random() * 10)
}
}
}
typeNext(startIndex)
}
watch(transcript, (newVal) => {
animateTyping(newVal)
})
watch(transcript, (v) => animateTyping(v))
// ====== Transcript ======
@@ -727,80 +355,43 @@ export function useVoiceCapture(options?: {
interimTranscript.value = ''
animatedTranscript.value = ''
lastAnimatedLength = 0
lastProcessedResult = ''
if (typingTimeout) {
clearTimeout(typingTimeout)
typingTimeout = null
}
if (typingTimeout) { clearTimeout(typingTimeout); typingTimeout = null }
}
// ====== Lifecycle ======
async function init() {
recognition = initRecognition()
checkMobile()
supportedMimeType = detectAudioFormat()
// Only enumerate without getUserMedia — no user gesture here
// Devices will get full labels after first recording (user gesture)
await loadAudioDevices(true)
const status = await checkWhisperStatusFn()
if (status?.starting) {
console.log('[VoiceCapture] Server is starting, resuming polling...')
pollWhisperStatus()
} else if (voiceMode.value === 'whisper' && whisperStatus.value === 'ready') {
connectWhisperSocket()
} else if (voiceMode.value === 'whisper' && whisperStatus.value !== 'ready') {
console.log('[VoiceCapture] Whisper was enabled but server not running, disabling')
voiceMode.value = 'webspeech'
// Subscribe to shared whisper transcriptions
if (!unsubTranscription) {
unsubTranscription = onTranscription(handleTranscription)
}
// Initialize shared Whisper socket (singleton, safe to call multiple times)
initWhisperSocket()
console.log('[VoiceCapture] Initialized (Whisper-only, record-first)')
}
function cleanup() {
stopRecording()
recognition = null
disconnectWhisperSocket()
if (unsubTranscription) { unsubTranscription(); unsubTranscription = null }
if (chunkInterval) clearInterval(chunkInterval)
if (typingTimeout) clearTimeout(typingTimeout)
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop())
mediaStream = null
}
if (audioElement) {
audioElement.pause()
audioElement = null
}
if (lastAudioUrl.value) {
URL.revokeObjectURL(lastAudioUrl.value)
lastAudioUrl.value = ''
}
if (gpuTimeout) clearTimeout(gpuTimeout)
if (mediaStream) { mediaStream.getTracks().forEach(t => t.stop()); mediaStream = null }
if (audioElement) { audioElement.pause(); audioElement = null }
if (lastAudioUrl.value) { URL.revokeObjectURL(lastAudioUrl.value); lastAudioUrl.value = '' }
isPlayingAudio.value = false
}
return {
// State
isRecording,
transcript,
interimTranscript,
animatedTranscript,
error,
voiceMode,
whisperStatus,
audioDevices,
selectedDeviceId,
isAndroid,
lastAudioUrl,
isPlayingAudio,
// Actions
startRecording,
stopRecording,
toggleWhisperMode,
checkWhisperStatus: checkWhisperStatusFn,
loadAudioDevices,
selectMicrophone,
playLastAudio,
init,
cleanup,
clearTranscript
isRecording, transcript, interimTranscript, animatedTranscript,
error, voiceMode, whisperStatus, audioDevices, selectedDeviceId,
isAndroid, lastAudioUrl, isPlayingAudio,
startRecording, stopRecording, loadAudioDevices, selectMicrophone,
playLastAudio, init, cleanup, clearTranscript
}
}

View File

@@ -71,11 +71,15 @@ export function createComponentHandlers(): ToolConfig[] {
properties: {
id: { type: 'string', description: 'ID del componente' },
componentProps: { type: 'object', description: 'Props para el componente' },
mode: { type: 'string', enum: ['replace', 'append'], description: 'Modo' }
mode: { type: 'string', enum: ['replace', 'append'], description: 'Modo' },
x: { type: 'number', description: 'Posicion X inicial' },
y: { type: 'number', description: 'Posicion Y inicial' },
width: { type: 'number', description: 'Ancho inicial' },
height: { type: 'number', description: 'Alto inicial' }
},
required: ['id']
},
handler: async (args: { id: string; componentProps?: Record<string, any>; mode?: string }) => {
handler: async (args: { id: string; componentProps?: Record<string, any>; mode?: string; x?: number; y?: number; width?: number; height?: number }) => {
try {
const definition = await componentsApi.getById(args.id)
if (!definition) {
@@ -88,7 +92,8 @@ export function createComponentHandlers(): ToolConfig[] {
removePlaceholder(container)
const isAppend = args.mode === 'append'
const result = renderInlineComponent(definition, container, args.componentProps || {}, isAppend)
const layout = { x: args.x, y: args.y, width: args.width, height: args.height }
const result = renderInlineComponent(definition, container, args.componentProps || {}, isAppend, layout)
// Track definition for snapshot capture
getWindowDefinitions().set(definition.id, {
@@ -168,11 +173,13 @@ export function createComponentHandlers(): ToolConfig[] {
type: 'array',
items: { type: 'string', enum: ['template', 'setup', 'style', 'props', 'imports'] },
description: 'Campos a leer (default: template, setup, style)'
}
},
offset: { type: 'number', description: 'Linea inicial (1-based)' },
limit: { type: 'number', description: 'Numero de lineas a leer' }
},
required: ['id']
},
handler: async (args: { id: string; fields?: string[] }) => {
handler: async (args: { id: string; fields?: string[]; offset?: number; limit?: number }) => {
try {
const definition = await componentsApi.getById(args.id)
if (!definition) return `Error: "${args.id}" not found`
@@ -191,7 +198,14 @@ export function createComponentHandlers(): ToolConfig[] {
output.push(`--- ${field} ---\n${arr?.length ? JSON.stringify(arr) : '(empty)'}`)
} else {
const str = (value as string) || ''
output.push(`--- ${field} (${str.length}) ---\n${str || '(empty)'}`)
const lines = str.split('\n')
const total = lines.length
const start = Math.max(0, (args.offset || 1) - 1)
const end = args.limit ? start + args.limit : total
const sliced = lines.slice(start, end)
const numbered = sliced.map((l, i) => `${String(start + i + 1).padStart(4)}\t${l}`).join('\n')
const rangeInfo = (args.offset || args.limit) ? ` lines ${start + 1}-${Math.min(end, total)}/${total}` : ` ${total} lines`
output.push(`--- ${field}${rangeInfo} ---\n${numbered || '(empty)'}`)
}
}

View File

@@ -0,0 +1,177 @@
/**
* Singleton Whisper WebSocket Service
* One shared connection used by all voice components (FloatingVoice, useVoiceCapture, etc.)
*/
import { ref } from 'vue'
import { endpoints } from '../config/endpoints'
export type WhisperStatus = 'offline' | 'loading' | 'ready'
type TranscriptionCallback = (msg: {
success?: boolean
text?: string
error?: string
partial?: boolean
model?: string
device?: string
}) => void
// ====== Singleton state ======
const status = ref<WhisperStatus>('loading')
let socket: WebSocket | null = null
let reconnectTimer: number | null = null
const listeners = new Set<TranscriptionCallback>()
// ====== Connection management ======
function connect() {
if (socket?.readyState === WebSocket.OPEN || socket?.readyState === WebSocket.CONNECTING) return
console.log('[WhisperSocket] Connecting to', endpoints.whisper)
socket = new WebSocket(endpoints.whisper)
const timeout = setTimeout(() => {
if (socket && socket.readyState !== WebSocket.OPEN) {
console.error('[WhisperSocket] Connection timeout (10s)')
socket.close()
status.value = 'loading'
}
}, 10000)
socket.onopen = () => {
clearTimeout(timeout)
console.log('[WhisperSocket] Connected')
status.value = 'ready'
}
socket.onmessage = (event) => {
try {
const msg = JSON.parse(event.data)
if (msg.type === 'ready') {
console.log('[WhisperSocket] Server ready:', msg.model, msg.device)
status.value = 'ready'
} else if (msg.type === 'transcription') {
// Broadcast to all listeners
for (const cb of listeners) {
cb(msg)
}
}
} catch (e) {
console.error('[WhisperSocket] Message parse error:', e)
}
}
socket.onclose = () => {
console.log('[WhisperSocket] Closed, will reconnect...')
socket = null
status.value = 'loading'
scheduleReconnect()
}
socket.onerror = (e) => {
console.error('[WhisperSocket] Error:', e)
status.value = 'loading'
}
}
function scheduleReconnect() {
if (reconnectTimer) return
reconnectTimer = window.setTimeout(() => {
reconnectTimer = null
checkStatusAndConnect()
}, 2000)
}
async function checkStatusAndConnect() {
try {
const res = await fetch('/api/whisper/status')
const data = await res.json()
if (data.running) {
connect()
} else {
status.value = 'loading'
scheduleReconnect()
}
} catch {
status.value = 'loading'
scheduleReconnect()
}
}
// ====== Public API ======
/** Initialize the singleton connection (call once at app startup) */
export function initWhisperSocket() {
checkStatusAndConnect()
}
/** Send audio for transcription */
export function sendAudio(base64: string, language: string, partial: boolean) {
if (socket?.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify({
type: 'transcribe',
audio: base64,
language,
partial
}))
} else {
console.warn('[WhisperSocket] Not connected, dropping audio')
}
}
/** Subscribe to transcription results. Returns unsubscribe function. */
export function onTranscription(callback: TranscriptionCallback): () => void {
listeners.add(callback)
return () => listeners.delete(callback)
}
/** Get reactive status */
export function getWhisperStatus() {
return status
}
/** Check if socket is connected */
export function isConnected(): boolean {
return socket?.readyState === WebSocket.OPEN
}
/** Force reconnect (e.g. when user toggles Whisper) */
export async function reconnect() {
if (status.value === 'loading' && socket?.readyState === WebSocket.CONNECTING) return
status.value = 'loading'
if (socket) {
socket.close()
socket = null
}
try {
const res = await fetch('/api/whisper/toggle', { method: 'POST' })
const data = await res.json()
if (data.running) {
connect()
} else {
// Poll until ready
const poll = async () => {
for (let i = 0; i < 60; i++) {
await new Promise(r => setTimeout(r, 2000))
try {
const s = await fetch('/api/whisper/status')
const d = await s.json()
if (d.running) {
connect()
return
}
} catch { /* retry */ }
}
status.value = 'offline'
}
poll()
}
} catch {
status.value = 'loading'
scheduleReconnect()
}
}