From 59cc8ee87e3e5966ae4db0cdb141502769bbea76 Mon Sep 17 00:00:00 2001 From: josedario87 Date: Sun, 15 Feb 2026 23:33:29 -0600 Subject: [PATCH] feat: Migrate voice capture to composable with floating push-to-talk Extract voice recording logic from FloatingVoice.vue into useVoiceCapture composable. TranscriptCard now does real recording instead of mock typing. InputSettings allows voice mode toggle (WebSpeech/Whisper GPU), mic selection, and debug audio playback. ChatInput gets a settings gear button. Long-press on FloatBubble shows a floating TranscriptCard (push-to-talk) instead of opening the full PromptBar. Release stops recording after a 500ms buffer. Click still opens PromptBar normally. Parallel MediaRecorder captures raw audio in WebSpeech mode for DB save and debug playback. Transient errors (no-speech) no longer kill sessions. Touch selection prevention on FloatBubble for tablets. --- frontend/src/components/AgentBar.vue | 201 ++++- frontend/src/components/agent/FloatBubble.vue | 24 +- .../src/components/agent/InputSettings.vue | 41 + frontend/src/components/agent/PromptBar.vue | 736 +++++++++++++++++- .../src/components/agent/TranscriptCard.vue | 6 +- 5 files changed, 971 insertions(+), 37 deletions(-) diff --git a/frontend/src/components/AgentBar.vue b/frontend/src/components/AgentBar.vue index 9570af0..41de78a 100644 --- a/frontend/src/components/AgentBar.vue +++ b/frontend/src/components/AgentBar.vue @@ -2,8 +2,11 @@ import { ref, reactive, onMounted, onBeforeUnmount, computed } from 'vue' import { endpoints } from '../config/endpoints' import type { Agent, AgentStatusState, ClaudeStatus } from '../types/agent' +import { useVoiceCapture } from '../composables/useVoiceCapture' +import { useCanvasStore } from '../stores/canvas' import FloatBubble from './agent/FloatBubble.vue' import PromptBar from './agent/PromptBar.vue' +import TranscriptCard from './agent/TranscriptCard.vue' const agents = ref([]) const loading = ref(true) @@ -22,6 +25,39 @@ const isRecordingActive = computed(() => promptBarRef.value?.isRecording ?? false ) +// Floating transcript state (long-press recording) +const canvasStore = useCanvasStore() +const floatingVoice = useVoiceCapture({ + onNotification: (msg, type, dur) => canvasStore.showNotification(msg, type, dur) +}) +const floatingAgentId = ref(null) +const floatingAnchorRect = ref(null) + +const floatingAgent = computed(() => + enabledAgents.value.find(a => a.id === floatingAgentId.value) || null +) + +const isFloatingRecording = computed(() => + !!floatingAgentId.value && floatingVoice.isRecording.value +) + +const floatingStyle = computed(() => { + if (!floatingAnchorRect.value) return {} + const rect = floatingAnchorRect.value + const bubbleCenterX = rect.left + rect.width / 2 + const bottomOffset = window.innerHeight - rect.top + 12 + const panelWidth = 320 + let left = bubbleCenterX - panelWidth / 2 + left = Math.max(12, Math.min(left, window.innerWidth - panelWidth - 12)) + return { + position: 'fixed' as const, + bottom: `${bottomOffset}px`, + left: `${left}px`, + width: `${panelWidth}px`, + zIndex: 10000 + } +}) + const enabledAgents = computed(() => agents.value.filter(a => a.uiConfig?.enabled) ) @@ -211,8 +247,56 @@ function handleBubbleClick(agent: Agent, event: MouseEvent) { openPromptBar(agent, event.currentTarget as HTMLElement, false) } -function handleBubbleHold(agent: Agent, el: HTMLElement) { - openPromptBar(agent, el, true) +async function handleBubbleHold(agent: Agent, el: HTMLElement) { + // Close PromptBar if open + if (activeAgentId.value) { + handlePromptClose() + } + // Open floating transcript + floatingAnchorRect.value = el.getBoundingClientRect() + floatingAgentId.value = agent.id + await floatingVoice.init() + floatingVoice.clearTranscript() + floatingVoice.startRecording() +} + +function handleBubbleHoldRelease() { + if (!floatingAgentId.value || !floatingVoice.isRecording.value) return + // Buffer 500ms for trailing words, then stop and emit done via TranscriptCard + setTimeout(() => { + if (floatingVoice.isRecording.value) { + floatingVoice.stopRecording() + // Wait for final Whisper result if needed + const delay = floatingVoice.voiceMode.value === 'whisper' ? 800 : 200 + setTimeout(() => { + const text = floatingVoice.transcript.value.trim() + closeFloating() + if (text) { + console.log(`[AgentBar] Voice submit to ${floatingAgent.value?.id}:`, text) + } + }, delay) + } + }, 500) +} + +function handleFloatingDone(text: string) { + closeFloating() + if (text.trim()) { + console.log(`[AgentBar] Voice submit to ${floatingAgent.value?.id}:`, text) + } +} + +function closeFloating() { + if (floatingVoice.isRecording.value) { + floatingVoice.stopRecording() + } + floatingVoice.cleanup() + floatingAgentId.value = null + floatingAnchorRect.value = null +} + +function handleFloatingClose() { + closeFloating() } function handlePromptClose() { @@ -234,6 +318,7 @@ onMounted(() => { onBeforeUnmount(() => { statusWs?.close() + floatingVoice.cleanup() if (reconnectTimeout) clearTimeout(reconnectTimeout) for (const [, timers] of agentTimers) { for (const key of Object.keys(timers)) { @@ -251,9 +336,10 @@ onBeforeUnmount(() => { :key="agent.id" :agent="agent" :status="agentStatuses[agent.id]" - :recording="activeAgentId === agent.id && isRecordingActive" + :recording="(activeAgentId === agent.id && isRecordingActive) || (floatingAgentId === agent.id && isFloatingRecording)" @click="handleBubbleClick(agent, $event)" @hold="handleBubbleHold(agent, $event)" + @holdrelease="handleBubbleHoldRelease" /> @@ -267,6 +353,27 @@ onBeforeUnmount(() => { @close="handlePromptClose" @submit="handlePromptSubmit" /> + + + + +
+
+
+
+ {{ floatingAgent.uiConfig?.shortLabel }} +
+ {{ floatingAgent?.uiConfig?.label || floatingAgent?.name }} +
+ +
+
+
+
diff --git a/frontend/src/components/agent/FloatBubble.vue b/frontend/src/components/agent/FloatBubble.vue index 21196d0..ea236dd 100644 --- a/frontend/src/components/agent/FloatBubble.vue +++ b/frontend/src/components/agent/FloatBubble.vue @@ -11,9 +11,10 @@ const props = defineProps<{ const emit = defineEmits<{ click: [event: MouseEvent] hold: [el: HTMLElement] + holdrelease: [] }>() -const HOLD_MS = 400 +const HOLD_MS = 200 let holdTimer: number | null = null let didHold = false let holdTarget: HTMLElement | null = null @@ -30,13 +31,18 @@ function onPointerDown(e: PointerEvent) { function onPointerUp(e: PointerEvent) { clearHold() - if (!didHold) { + if (didHold) { + emit('holdrelease') + } else { emit('click', e as unknown as MouseEvent) } } function onPointerCancel() { clearHold() + if (didHold) { + emit('holdrelease') + } } function clearHold() { @@ -124,7 +130,10 @@ function bubbleTitle() { @pointerdown.prevent="onPointerDown" @pointerup="onPointerUp" @pointercancel="onPointerCancel" + @touchstart.prevent @contextmenu.prevent + @selectstart.prevent + @dragstart.prevent @mouseenter="!isAnimating() && (($event.currentTarget as HTMLElement).style.boxShadow = bubbleHoverShadow())" @mouseleave="!isAnimating() && (($event.currentTarget as HTMLElement).style.boxShadow = bubbleStyle().boxShadow || '')" > @@ -194,7 +203,8 @@ function bubbleTitle() { -webkit-user-select: none; user-select: none; -webkit-touch-callout: none; - touch-action: manipulation; + -webkit-tap-highlight-color: transparent; + touch-action: none; position: relative; } @@ -543,6 +553,14 @@ function bubbleTitle() { 50% { opacity: 0.7; } } +/* Prevent touch selection on all children */ +.agent-bubble * { + -webkit-user-select: none; + user-select: none; + -webkit-touch-callout: none; + pointer-events: none; +} + /* ====================== LABELS ====================== */ .bubble-label { diff --git a/frontend/src/components/agent/InputSettings.vue b/frontend/src/components/agent/InputSettings.vue index ba4894d..d9c550c 100644 --- a/frontend/src/components/agent/InputSettings.vue +++ b/frontend/src/components/agent/InputSettings.vue @@ -1,18 +1,41 @@