diff --git a/frontend/src/components/FloatingTerminal.vue b/frontend/src/components/FloatingTerminal.vue index 128ca07..538e25c 100644 --- a/frontend/src/components/FloatingTerminal.vue +++ b/frontend/src/components/FloatingTerminal.vue @@ -250,13 +250,38 @@ function initTerminal() { } }) - // Capture Ctrl+E even when terminal has focus + // Capture Ctrl+E and Ctrl+V when terminal has focus terminal.attachCustomKeyEventHandler((e) => { + // Ctrl+E: Toggle terminal if (e.ctrlKey && e.key === 'e') { e.preventDefault() toggleTerminal() - return false // Prevent terminal from processing + return false } + + // Ctrl+V: Paste from clipboard + if (e.ctrlKey && e.key === 'v' && e.type === 'keydown') { + e.preventDefault() + navigator.clipboard.readText().then((text) => { + if (text && socket && socket.readyState === WebSocket.OPEN) { + socket.send(JSON.stringify({ type: 'input', data: text })) + } + }).catch((err) => { + console.error('[Terminal] Clipboard read failed:', err) + }) + return false + } + + // Ctrl+C: Copy selection (if any) + if (e.ctrlKey && e.key === 'c' && e.type === 'keydown') { + const selection = terminal?.getSelection() + if (selection) { + navigator.clipboard.writeText(selection).catch(console.error) + return false + } + // If no selection, let Ctrl+C pass through as SIGINT + } + return true // Let terminal handle other keys }) } diff --git a/frontend/src/components/FloatingVoice.vue b/frontend/src/components/FloatingVoice.vue index 10845c0..24cc6f0 100644 --- a/frontend/src/components/FloatingVoice.vue +++ b/frontend/src/components/FloatingVoice.vue @@ -30,19 +30,32 @@ const isDragging = ref(false) const dragOffset = ref({ x: 0, y: 0 }) const containerRef = ref(null) -// Speech recognition +// Speech recognition (Web Speech API) let recognition: SpeechRecognition | null = null -// WebSocket connection (own session) +// WebSocket connection to terminal const WS_URL = `ws://${window.location.hostname}:4103` let socket: WebSocket | null = null const connected = ref(false) -// Push-to-talk state (Ctrl+S) +// Push-to-talk state (Ctrl+Space) let keyDownTime = 0 let holdTimeout: number | null = null const isPushToTalk = ref(false) +// ============ WHISPER MODE ============ +const useWhisper = ref(false) +const whisperReady = ref(false) +const whisperLoading = ref(false) +const WHISPER_WS_URL = `ws://${window.location.hostname}:4104` +let whisperSocket: WebSocket | null = null +let mediaRecorder: MediaRecorder | null = null +let audioChunks: Blob[] = [] +let lastTranscriptLength = 0 // Track length of last transcription to show only new text +let chunkInterval: number | null = null +const CHUNK_INTERVAL_MS = 3000 // Send audio every 3 seconds +let mediaStream: MediaStream | null = null + const displayText = computed(() => { if (interimTranscript.value) { return transcript.value + ' ' + interimTranscript.value @@ -73,7 +86,7 @@ function initRecognition() { const rec = new SpeechRecognition() rec.continuous = true rec.interimResults = true - rec.lang = 'es-ES' + rec.lang = 'es-419' // Latin American Spanish (better for accents) rec.onresult = (event: SpeechRecognitionEvent) => { let interim = '' @@ -105,7 +118,7 @@ function initRecognition() { } rec.onend = () => { - if (isRecording.value) { + if (isRecording.value && !useWhisper.value) { // Restart if still recording (browser stops after silence) rec.start() } @@ -114,6 +127,215 @@ function initRecognition() { return rec } +// ============ WHISPER FUNCTIONS ============ + +async function checkWhisperStatus() { + try { + const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`) + const data = await res.json() + useWhisper.value = data.enabled + whisperReady.value = data.running + return data + } catch { + useWhisper.value = false + whisperReady.value = false + return null + } +} + +async function toggleWhisperMode() { + whisperLoading.value = true + error.value = '' + + try { + const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, { + method: 'POST' + }) + const data = await res.json() + + useWhisper.value = data.enabled + whisperReady.value = data.running + + if (data.enabled) { + canvasStore.showNotification('Whisper GPU enabled', 'success') + connectWhisperSocket() + } else { + canvasStore.showNotification('Using Web Speech API', 'info') + disconnectWhisperSocket() + } + } catch (e: any) { + error.value = 'Failed to toggle Whisper' + console.error('[Voice] Whisper toggle error:', e) + } finally { + whisperLoading.value = false + } +} + +function connectWhisperSocket() { + if (whisperSocket?.readyState === WebSocket.OPEN) return + + console.log('[Voice] Connecting to Whisper server...') + whisperSocket = new WebSocket(WHISPER_WS_URL) + + whisperSocket.onopen = () => { + console.log('[Voice] Whisper WebSocket connected') + whisperReady.value = true + } + + whisperSocket.onmessage = (event) => { + try { + const msg = JSON.parse(event.data) + + if (msg.type === 'ready') { + console.log('[Voice] Whisper ready:', msg.model, msg.device) + whisperReady.value = true + } else if (msg.type === 'transcription') { + if (msg.success && msg.text) { + const fullText = msg.text.trim() + + if (msg.partial) { + // For partial results, show as interim (will be replaced) + // Only show text that's new since last transcription + const newText = fullText.substring(lastTranscriptLength).trim() + if (newText) { + interimTranscript.value = newText + console.log(`[Voice] 🔄 WHISPER partial:`, newText) + } + } else { + // Final result - replace everything + transcript.value = fullText + ' ' + interimTranscript.value = '' + lastTranscriptLength = 0 + console.log(`[Voice] 🎯 WHISPER-GPU (${msg.model}/${msg.device}):`, fullText) + } + + // Update last transcript length for next partial + lastTranscriptLength = fullText.length + } else if (msg.error) { + error.value = msg.error + console.error('[Voice] Whisper error:', msg.error) + } + } + } catch (e) { + console.error('[Voice] Whisper message error:', e) + } + } + + whisperSocket.onclose = () => { + console.log('[Voice] Whisper WebSocket closed') + whisperReady.value = false + } + + whisperSocket.onerror = (e) => { + console.error('[Voice] Whisper WebSocket error:', e) + whisperReady.value = false + } +} + +function disconnectWhisperSocket() { + if (whisperSocket) { + whisperSocket.close() + whisperSocket = null + } + whisperReady.value = false +} + +async function startWhisperRecording() { + try { + mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true }) + + mediaRecorder = new MediaRecorder(mediaStream, { + mimeType: 'audio/webm;codecs=opus' + }) + + audioChunks = [] + + mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + audioChunks.push(event.data) + } + } + + // Reset state for new recording + audioChunks = [] + lastTranscriptLength = 0 + + // Start recording + mediaRecorder.start(100) // Collect data every 100ms + isRecording.value = true + interimTranscript.value = 'Listening (Whisper GPU)...' + + // Send chunks periodically for progressive transcription + chunkInterval = window.setInterval(() => { + if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) { + sendAudioChunk(false) // false = partial, don't clear + } + }, CHUNK_INTERVAL_MS) + + } catch (e: any) { + error.value = `Microphone error: ${e.message}` + console.error('[Voice] Microphone error:', e) + } +} + +function sendAudioChunk(isFinal: boolean) { + if (audioChunks.length === 0) return + + // Always send ALL accumulated audio (webm needs header from first chunk) + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }) + + // Clear chunks only if final + if (isFinal) { + audioChunks = [] + lastTranscriptLength = 0 + } + + const reader = new FileReader() + reader.onloadend = () => { + const base64 = (reader.result as string).split(',')[1] + + if (whisperSocket?.readyState === WebSocket.OPEN) { + if (!isFinal) { + interimTranscript.value = 'Processing...' + } + whisperSocket.send(JSON.stringify({ + type: 'transcribe', + audio: base64, + language: 'es', + partial: !isFinal + })) + console.log(`[Voice] Sent ${isFinal ? 'FINAL' : 'partial'} audio (${audioChunks.length} chunks, ${audioBlob.size} bytes)`) + } + } + reader.readAsDataURL(audioBlob) +} + +function stopWhisperRecording() { + // Clear the chunk interval + if (chunkInterval) { + clearInterval(chunkInterval) + chunkInterval = null + } + + // Send final chunk + if (audioChunks.length > 0) { + sendAudioChunk(true) // true = final + } + + // Stop recorder + if (mediaRecorder && mediaRecorder.state !== 'inactive') { + mediaRecorder.stop() + } + + // Stop media stream + if (mediaStream) { + mediaStream.getTracks().forEach(track => track.stop()) + mediaStream = null + } + + isRecording.value = false +} + function toggleRecording() { if (isRecording.value) { stopRecording() @@ -124,24 +346,35 @@ function toggleRecording() { function startRecording() { error.value = '' - if (!recognition) { - recognition = initRecognition() - } - if (recognition) { - try { - recognition.start() - isRecording.value = true - } catch (e) { - console.error('[Voice] Failed to start:', e) + + if (useWhisper.value && whisperReady.value) { + // Use Whisper GPU mode + startWhisperRecording() + } else { + // Use Web Speech API + if (!recognition) { + recognition = initRecognition() + } + if (recognition) { + try { + recognition.start() + isRecording.value = true + } catch (e) { + console.error('[Voice] Failed to start:', e) + } } } } function stopRecording() { - if (recognition) { - recognition.stop() + if (useWhisper.value) { + stopWhisperRecording() + } else { + if (recognition) { + recognition.stop() + } + isRecording.value = false } - isRecording.value = false interimTranscript.value = '' } @@ -209,6 +442,7 @@ function sendTranscript() { function close() { stopRecording() + clearTranscript() isOpen.value = false } @@ -349,17 +583,28 @@ function sendTranscriptAndClose() { typeChar() } -onMounted(() => { +onMounted(async () => { recognition = initRecognition() // Use capture phase to intercept before terminal or other elements document.addEventListener('keydown', handleKeyDown, { capture: true }) document.addEventListener('keyup', handleKeyUp, { capture: true }) + + // Check Whisper status on mount + await checkWhisperStatus() + if (useWhisper.value) { + connectWhisperSocket() + } }) onBeforeUnmount(() => { stopRecording() recognition = null disconnectSocket() + disconnectWhisperSocket() + if (chunkInterval) clearInterval(chunkInterval) + if (mediaStream) { + mediaStream.getTracks().forEach(track => track.stop()) + } document.removeEventListener('keydown', handleKeyDown, { capture: true }) document.removeEventListener('keyup', handleKeyUp, { capture: true }) document.removeEventListener('mousemove', onDrag) @@ -408,8 +653,23 @@ defineExpose({ Voice + + {{ useWhisper ? 'GPU' : 'Web' }} +
+