feat: Add FloatingVoice component for voice-to-text input

- Add FloatingVoice component with Web Speech API transcription
- Each component has its own independent WebSocket session
- Voice panel connects on open, disconnects on close
- Sends transcribed text to Claude Code with Enter key
This commit is contained in:
2026-02-13 20:24:57 -06:00
parent 86b3246fa1
commit 8118356999
3 changed files with 652 additions and 1 deletions

View File

@@ -8,6 +8,7 @@ import ToolsDropdown from './components/ToolsDropdown.vue'
import ConnectionDropdown from './components/ConnectionDropdown.vue' import ConnectionDropdown from './components/ConnectionDropdown.vue'
import FloatingTerminal from './components/FloatingTerminal.vue' import FloatingTerminal from './components/FloatingTerminal.vue'
import FloatingResponse from './components/FloatingResponse.vue' import FloatingResponse from './components/FloatingResponse.vue'
import FloatingVoice from './components/FloatingVoice.vue'
import PwaInstallBanner from './components/PwaInstallBanner.vue' import PwaInstallBanner from './components/PwaInstallBanner.vue'
import { initWebMCP, getWebMCP, startTokenPolling, stopTokenPolling, connectWithToken } from './services/webmcp' import { initWebMCP, getWebMCP, startTokenPolling, stopTokenPolling, connectWithToken } from './services/webmcp'
import { initToolRegistry, activatePageTools, initToolsOnRefresh } from './services/toolRegistry' import { initToolRegistry, activatePageTools, initToolsOnRefresh } from './services/toolRegistry'
@@ -18,6 +19,7 @@ import { useCanvasStore } from './stores/canvas'
const route = useRoute() const route = useRoute()
const router = useRouter() const router = useRouter()
const showTerminal = ref(false) const showTerminal = ref(false)
const showVoice = ref(false)
const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null) const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null)
const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null) const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null)
const canvasStore = useCanvasStore() const canvasStore = useCanvasStore()
@@ -157,11 +159,29 @@ watch(() => route.name, (newPage) => {
</svg> </svg>
</button> </button>
<!-- Voice FAB Button -->
<button
class="voice-fab"
:class="{ active: showVoice }"
@click="showVoice = !showVoice"
title="Voice Input"
>
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="23"/>
<line x1="8" y1="23" x2="16" y2="23"/>
</svg>
</button>
<!-- Floating Terminal --> <!-- Floating Terminal -->
<FloatingTerminal ref="terminalRef" v-model="showTerminal" /> <FloatingTerminal ref="terminalRef" v-model="showTerminal" />
<!-- Floating Response (Agent UI messages) --> <!-- Floating Response (Agent UI messages) -->
<FloatingResponse ref="responseRef" /> <FloatingResponse ref="responseRef" />
<!-- Floating Voice Input -->
<FloatingVoice v-model="showVoice" />
</div> </div>
</template> </template>
@@ -257,6 +277,40 @@ watch(() => route.name, (newPage) => {
box-shadow: 0 12px 32px rgba(239, 68, 68, 0.5); box-shadow: 0 12px 32px rgba(239, 68, 68, 0.5);
} }
/* Voice FAB */
.voice-fab {
position: fixed;
bottom: 20px;
left: 20px;
width: 48px;
height: 48px;
border-radius: 50%;
background: linear-gradient(135deg, #10b981 0%, #059669 100%);
color: white;
border: none;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 6px 20px rgba(16, 185, 129, 0.4);
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
z-index: 9998;
}
.voice-fab:hover {
transform: scale(1.08);
box-shadow: 0 8px 24px rgba(16, 185, 129, 0.5);
}
.voice-fab.active {
background: linear-gradient(135deg, #ef4444 0%, #dc2626 100%);
box-shadow: 0 6px 20px rgba(239, 68, 68, 0.4);
}
.voice-fab.active:hover {
box-shadow: 0 8px 24px rgba(239, 68, 68, 0.5);
}
@media (max-width: 768px) { @media (max-width: 768px) {
.terminal-fab { .terminal-fab {
bottom: 16px; bottom: 16px;
@@ -269,5 +323,12 @@ watch(() => route.name, (newPage) => {
opacity: 0; opacity: 0;
pointer-events: none; pointer-events: none;
} }
.voice-fab {
bottom: 16px;
left: 16px;
width: 44px;
height: 44px;
}
} }
</style> </style>

View File

@@ -436,7 +436,14 @@ defineExpose({
isOpen: isOpen.value, isOpen: isOpen.value,
position: position.value, position: position.value,
size: size.value size: size.value
}) }),
sendInput: (text: string) => {
if (socket && socket.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify({ type: 'input', data: text + '\r' }))
return true
}
return false
}
}) })
</script> </script>

View File

@@ -0,0 +1,583 @@
<script setup lang="ts">
import { ref, computed, onMounted, onBeforeUnmount, watch } from 'vue'
import { useCanvasStore } from '../stores/canvas'
const props = defineProps<{
modelValue: boolean
}>()
const emit = defineEmits<{
'update:modelValue': [value: boolean]
}>()
const canvasStore = useCanvasStore()
const isOpen = computed({
get: () => props.modelValue,
set: (val) => emit('update:modelValue', val)
})
// Recording state
const isRecording = ref(false)
const transcript = ref('')
const interimTranscript = ref('')
const error = ref('')
// Position and drag state
const position = ref({ x: 0, y: 0 })
const hasCustomPosition = ref(false)
const isDragging = ref(false)
const dragOffset = ref({ x: 0, y: 0 })
const containerRef = ref<HTMLElement | null>(null)
// Speech recognition
let recognition: SpeechRecognition | null = null
// WebSocket connection (own session)
const WS_URL = `ws://${window.location.hostname}:4103`
let socket: WebSocket | null = null
const connected = ref(false)
const displayText = computed(() => {
if (interimTranscript.value) {
return transcript.value + ' ' + interimTranscript.value
}
return transcript.value || 'Presiona el micrófono para comenzar...'
})
const containerStyle = computed(() => {
if (!hasCustomPosition.value) {
return { bottom: '80px', left: '16px' }
}
return {
top: `${position.value.y}px`,
left: `${position.value.x}px`,
bottom: 'auto',
right: 'auto'
}
})
function initRecognition() {
const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition
if (!SpeechRecognition) {
error.value = 'Speech recognition not supported in this browser'
return null
}
const rec = new SpeechRecognition()
rec.continuous = true
rec.interimResults = true
rec.lang = 'es-ES'
rec.onresult = (event: SpeechRecognitionEvent) => {
let interim = ''
let final = ''
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i]
if (result.isFinal) {
final += result[0].transcript + ' '
} else {
interim += result[0].transcript
}
}
if (final) {
transcript.value += final
}
interimTranscript.value = interim
}
rec.onerror = (event: SpeechRecognitionErrorEvent) => {
console.error('[Voice] Recognition error:', event.error)
if (event.error === 'not-allowed') {
error.value = 'Microphone access denied'
} else {
error.value = `Error: ${event.error}`
}
isRecording.value = false
}
rec.onend = () => {
if (isRecording.value) {
// Restart if still recording (browser stops after silence)
rec.start()
}
}
return rec
}
function toggleRecording() {
if (isRecording.value) {
stopRecording()
} else {
startRecording()
}
}
function startRecording() {
error.value = ''
if (!recognition) {
recognition = initRecognition()
}
if (recognition) {
try {
recognition.start()
isRecording.value = true
} catch (e) {
console.error('[Voice] Failed to start:', e)
}
}
}
function stopRecording() {
if (recognition) {
recognition.stop()
}
isRecording.value = false
interimTranscript.value = ''
}
function clearTranscript() {
transcript.value = ''
interimTranscript.value = ''
}
function connectSocket() {
if (socket && socket.readyState === WebSocket.OPEN) return
socket = new WebSocket(WS_URL)
socket.onopen = () => {
console.log('[Voice] WebSocket connected')
connected.value = true
}
socket.onclose = () => {
console.log('[Voice] WebSocket closed')
connected.value = false
socket = null
}
socket.onerror = (e) => {
console.error('[Voice] WebSocket error:', e)
connected.value = false
}
}
function disconnectSocket() {
if (socket) {
socket.close()
socket = null
connected.value = false
}
}
function sendTranscript() {
const text = transcript.value.trim()
if (!text) return
if (!socket || socket.readyState !== WebSocket.OPEN) {
canvasStore.showNotification('Not connected to terminal', 'error')
return
}
// Send text character by character then Enter
const chars = (text + '\r').split('')
let i = 0
const typeChar = () => {
if (i < chars.length && socket && socket.readyState === WebSocket.OPEN) {
socket.send(JSON.stringify({ type: 'input', data: chars[i] }))
i++
setTimeout(typeChar, 15)
} else if (i >= chars.length) {
canvasStore.showNotification('Voice message sent', 'success')
clearTranscript()
close()
}
}
typeChar()
}
function close() {
stopRecording()
isOpen.value = false
}
// Drag handlers
function startDrag(e: MouseEvent) {
if ((e.target as HTMLElement).closest('.window-controls')) return
isDragging.value = true
const rect = containerRef.value?.getBoundingClientRect()
if (rect) {
dragOffset.value = {
x: e.clientX - rect.left,
y: e.clientY - rect.top
}
}
document.addEventListener('mousemove', onDrag)
document.addEventListener('mouseup', stopDrag)
}
function onDrag(e: MouseEvent) {
if (!isDragging.value) return
const w = containerRef.value?.offsetWidth || 300
const h = containerRef.value?.offsetHeight || 200
const minX = -w * 0.75
const maxX = window.innerWidth - w * 0.25
const minY = -h * 0.75
const maxY = window.innerHeight - h * 0.25
position.value = {
x: Math.max(minX, Math.min(e.clientX - dragOffset.value.x, maxX)),
y: Math.max(minY, Math.min(e.clientY - dragOffset.value.y, maxY))
}
hasCustomPosition.value = true
}
function stopDrag() {
isDragging.value = false
document.removeEventListener('mousemove', onDrag)
document.removeEventListener('mouseup', stopDrag)
}
onMounted(() => {
recognition = initRecognition()
})
onBeforeUnmount(() => {
stopRecording()
recognition = null
disconnectSocket()
document.removeEventListener('mousemove', onDrag)
document.removeEventListener('mouseup', stopDrag)
})
// Connect when panel opens, disconnect when closes
watch(isOpen, (open) => {
if (open) {
connectSocket()
} else {
disconnectSocket()
}
})
// Expose methods
defineExpose({
open: () => { isOpen.value = true },
close,
startRecording,
stopRecording,
getTranscript: () => transcript.value
})
</script>
<template>
<Teleport to="body">
<Transition name="voice-slide">
<div
v-if="isOpen"
ref="containerRef"
class="voice-window"
:class="{ dragging: isDragging }"
:style="containerStyle"
>
<div class="glass">
<!-- Titlebar -->
<div class="titlebar" @mousedown="startDrag">
<div class="left">
<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
<line x1="12" y1="19" x2="12" y2="23"/>
<line x1="8" y1="23" x2="16" y2="23"/>
</svg>
<span>Voice</span>
<i class="dot" :class="{ recording: isRecording }"></i>
</div>
<div class="window-controls">
<button class="x" @click="close" title="Close">
<svg width="8" height="8" viewBox="0 0 10 10">
<line x1="0" y1="0" x2="10" y2="10" stroke="currentColor" stroke-width="1.5"/>
<line x1="10" y1="0" x2="0" y2="10" stroke="currentColor" stroke-width="1.5"/>
</svg>
</button>
</div>
</div>
<!-- Content -->
<div class="content">
<div class="transcript" :class="{ empty: !transcript && !interimTranscript }">
<span class="final">{{ transcript }}</span>
<span class="interim">{{ interimTranscript }}</span>
<span v-if="!transcript && !interimTranscript" class="placeholder">
Presiona el micrófono para comenzar...
</span>
</div>
<div v-if="error" class="error">{{ error }}</div>
</div>
<!-- Controls -->
<div class="controls">
<button
class="rec-btn"
:class="{ active: isRecording }"
@click="toggleRecording"
:title="isRecording ? 'Stop' : 'Record'"
>
<svg v-if="!isRecording" width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
<path d="M19 10v2a7 7 0 0 1-14 0v-2" fill="none" stroke="currentColor" stroke-width="2"/>
<line x1="12" y1="19" x2="12" y2="23" stroke="currentColor" stroke-width="2"/>
<line x1="8" y1="23" x2="16" y2="23" stroke="currentColor" stroke-width="2"/>
</svg>
<svg v-else width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
<rect x="6" y="6" width="12" height="12" rx="2"/>
</svg>
</button>
<button class="action-btn" @click="clearTranscript" title="Clear" :disabled="!transcript">
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polyline points="3 6 5 6 21 6"/>
<path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"/>
</svg>
</button>
<button
class="action-btn send"
@click="sendTranscript"
title="Send to Claude"
:disabled="!transcript.trim()"
>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<line x1="22" y1="2" x2="11" y2="13"/>
<polygon points="22 2 15 22 11 13 2 9 22 2"/>
</svg>
<span>Send</span>
</button>
</div>
</div>
</div>
</Transition>
</Teleport>
</template>
<style scoped>
.voice-window {
position: fixed;
width: 320px;
z-index: 9998;
}
.voice-window.dragging {
user-select: none;
}
.glass {
display: flex;
flex-direction: column;
background: rgba(200, 215, 235, 0.35);
backdrop-filter: blur(24px) saturate(1.6);
-webkit-backdrop-filter: blur(24px) saturate(1.6);
border-radius: 8px;
border: 1px solid rgba(255, 255, 255, 0.6);
box-shadow:
0 0 0 1px rgba(80, 120, 180, 0.25),
0 6px 24px rgba(0, 0, 0, 0.25),
inset 0 1px 0 rgba(255, 255, 255, 0.6);
overflow: hidden;
}
.titlebar {
height: 26px;
padding: 0 6px;
background: rgba(255, 255, 255, 0.25);
border-bottom: 1px solid rgba(255, 255, 255, 0.3);
display: flex;
align-items: center;
justify-content: space-between;
cursor: grab;
user-select: none;
}
.titlebar:active {
cursor: grabbing;
}
.left {
display: flex;
align-items: center;
gap: 6px;
color: #222;
font: 500 11px/1 system-ui, sans-serif;
}
.dot {
width: 6px;
height: 6px;
border-radius: 50%;
background: #999;
}
.dot.recording {
background: #e33;
box-shadow: 0 0 6px #e33;
animation: pulse 0.8s infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.window-controls {
display: flex;
gap: 2px;
}
.window-controls button {
width: 20px;
height: 18px;
display: flex;
align-items: center;
justify-content: center;
background: rgba(255, 255, 255, 0.3);
border: 1px solid rgba(0, 0, 0, 0.1);
border-radius: 3px;
color: #333;
cursor: pointer;
}
.window-controls button:hover {
background: rgba(255, 255, 255, 0.5);
}
.window-controls button.x:hover {
background: linear-gradient(180deg, #e66 0%, #c33 100%);
border-color: #a22;
color: #fff;
}
.content {
padding: 12px;
min-height: 80px;
max-height: 150px;
overflow-y: auto;
}
.transcript {
font-size: 13px;
line-height: 1.5;
color: #222;
}
.transcript.empty {
color: #666;
font-style: italic;
}
.transcript .interim {
color: #666;
font-style: italic;
}
.transcript .placeholder {
color: #888;
}
.error {
margin-top: 8px;
padding: 6px 10px;
background: rgba(220, 50, 50, 0.15);
border-radius: 4px;
color: #c33;
font-size: 11px;
}
.controls {
display: flex;
align-items: center;
gap: 8px;
padding: 10px 12px;
background: rgba(255, 255, 255, 0.15);
border-top: 1px solid rgba(255, 255, 255, 0.3);
}
.rec-btn {
width: 36px;
height: 36px;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
background: linear-gradient(180deg, #f5f5f5 0%, #ddd 100%);
border: 1px solid rgba(0, 0, 0, 0.2);
color: #333;
cursor: pointer;
transition: all 0.15s;
}
.rec-btn:hover {
background: linear-gradient(180deg, #fff 0%, #eee 100%);
}
.rec-btn.active {
background: linear-gradient(180deg, #e55 0%, #c33 100%);
border-color: #a22;
color: #fff;
animation: pulse 1s infinite;
}
.action-btn {
height: 28px;
padding: 0 10px;
display: flex;
align-items: center;
gap: 4px;
background: rgba(255, 255, 255, 0.4);
border: 1px solid rgba(0, 0, 0, 0.15);
border-radius: 4px;
color: #333;
font-size: 11px;
cursor: pointer;
transition: all 0.15s;
}
.action-btn:hover:not(:disabled) {
background: rgba(255, 255, 255, 0.6);
}
.action-btn:disabled {
opacity: 0.4;
cursor: not-allowed;
}
.action-btn.send {
margin-left: auto;
background: linear-gradient(180deg, #4a9 0%, #3a8 100%);
border-color: #2a7;
color: #fff;
}
.action-btn.send:hover:not(:disabled) {
background: linear-gradient(180deg, #5ba 0%, #4a9 100%);
}
/* Transitions */
.voice-slide-enter-active,
.voice-slide-leave-active {
transition: all 0.2s ease;
}
.voice-slide-enter-from,
.voice-slide-leave-to {
opacity: 0;
transform: translateY(16px) scale(0.95);
}
</style>