feat: Migrate voice capture to composable with floating push-to-talk

Extract voice recording logic from FloatingVoice.vue into useVoiceCapture
composable. TranscriptCard now does real recording instead of mock typing.
InputSettings allows voice mode toggle (WebSpeech/Whisper GPU), mic
selection, and debug audio playback. ChatInput gets a settings gear button.

Long-press on FloatBubble shows a floating TranscriptCard (push-to-talk)
instead of opening the full PromptBar. Release stops recording after a
500ms buffer. Click still opens PromptBar normally.

Parallel MediaRecorder captures raw audio in WebSpeech mode for DB save
and debug playback. Transient errors (no-speech) no longer kill sessions.
Touch selection prevention on FloatBubble for tablets.
This commit is contained in:
2026-02-15 23:33:29 -06:00
parent f3ac7986ec
commit 59cc8ee87e
5 changed files with 971 additions and 37 deletions

View File

@@ -2,8 +2,11 @@
import { ref, reactive, onMounted, onBeforeUnmount, computed } from 'vue'
import { endpoints } from '../config/endpoints'
import type { Agent, AgentStatusState, ClaudeStatus } from '../types/agent'
import { useVoiceCapture } from '../composables/useVoiceCapture'
import { useCanvasStore } from '../stores/canvas'
import FloatBubble from './agent/FloatBubble.vue'
import PromptBar from './agent/PromptBar.vue'
import TranscriptCard from './agent/TranscriptCard.vue'
const agents = ref<Agent[]>([])
const loading = ref(true)
@@ -22,6 +25,39 @@ const isRecordingActive = computed(() =>
promptBarRef.value?.isRecording ?? false
)
// Floating transcript state (long-press recording)
const canvasStore = useCanvasStore()
const floatingVoice = useVoiceCapture({
onNotification: (msg, type, dur) => canvasStore.showNotification(msg, type, dur)
})
const floatingAgentId = ref<string | null>(null)
const floatingAnchorRect = ref<DOMRect | null>(null)
const floatingAgent = computed(() =>
enabledAgents.value.find(a => a.id === floatingAgentId.value) || null
)
const isFloatingRecording = computed(() =>
!!floatingAgentId.value && floatingVoice.isRecording.value
)
const floatingStyle = computed(() => {
if (!floatingAnchorRect.value) return {}
const rect = floatingAnchorRect.value
const bubbleCenterX = rect.left + rect.width / 2
const bottomOffset = window.innerHeight - rect.top + 12
const panelWidth = 320
let left = bubbleCenterX - panelWidth / 2
left = Math.max(12, Math.min(left, window.innerWidth - panelWidth - 12))
return {
position: 'fixed' as const,
bottom: `${bottomOffset}px`,
left: `${left}px`,
width: `${panelWidth}px`,
zIndex: 10000
}
})
const enabledAgents = computed(() =>
agents.value.filter(a => a.uiConfig?.enabled)
)
@@ -211,8 +247,56 @@ function handleBubbleClick(agent: Agent, event: MouseEvent) {
openPromptBar(agent, event.currentTarget as HTMLElement, false)
}
function handleBubbleHold(agent: Agent, el: HTMLElement) {
openPromptBar(agent, el, true)
async function handleBubbleHold(agent: Agent, el: HTMLElement) {
// Close PromptBar if open
if (activeAgentId.value) {
handlePromptClose()
}
// Open floating transcript
floatingAnchorRect.value = el.getBoundingClientRect()
floatingAgentId.value = agent.id
await floatingVoice.init()
floatingVoice.clearTranscript()
floatingVoice.startRecording()
}
function handleBubbleHoldRelease() {
if (!floatingAgentId.value || !floatingVoice.isRecording.value) return
// Buffer 500ms for trailing words, then stop and emit done via TranscriptCard
setTimeout(() => {
if (floatingVoice.isRecording.value) {
floatingVoice.stopRecording()
// Wait for final Whisper result if needed
const delay = floatingVoice.voiceMode.value === 'whisper' ? 800 : 200
setTimeout(() => {
const text = floatingVoice.transcript.value.trim()
closeFloating()
if (text) {
console.log(`[AgentBar] Voice submit to ${floatingAgent.value?.id}:`, text)
}
}, delay)
}
}, 500)
}
function handleFloatingDone(text: string) {
closeFloating()
if (text.trim()) {
console.log(`[AgentBar] Voice submit to ${floatingAgent.value?.id}:`, text)
}
}
function closeFloating() {
if (floatingVoice.isRecording.value) {
floatingVoice.stopRecording()
}
floatingVoice.cleanup()
floatingAgentId.value = null
floatingAnchorRect.value = null
}
function handleFloatingClose() {
closeFloating()
}
function handlePromptClose() {
@@ -234,6 +318,7 @@ onMounted(() => {
onBeforeUnmount(() => {
statusWs?.close()
floatingVoice.cleanup()
if (reconnectTimeout) clearTimeout(reconnectTimeout)
for (const [, timers] of agentTimers) {
for (const key of Object.keys(timers)) {
@@ -251,9 +336,10 @@ onBeforeUnmount(() => {
:key="agent.id"
:agent="agent"
:status="agentStatuses[agent.id]"
:recording="activeAgentId === agent.id && isRecordingActive"
:recording="(activeAgentId === agent.id && isRecordingActive) || (floatingAgentId === agent.id && isFloatingRecording)"
@click="handleBubbleClick(agent, $event)"
@hold="handleBubbleHold(agent, $event)"
@holdrelease="handleBubbleHoldRelease"
/>
</div>
@@ -267,6 +353,27 @@ onBeforeUnmount(() => {
@close="handlePromptClose"
@submit="handlePromptSubmit"
/>
<!-- Floating transcript (long-press recording) -->
<Teleport to="body">
<Transition name="float-transcript">
<div v-if="floatingAgentId && floatingAnchorRect" class="float-transcript-backdrop" @click.self="handleFloatingClose">
<div class="float-transcript-panel" :style="floatingStyle">
<div class="ft-header">
<div
v-if="floatingAgent"
class="ft-badge"
:style="{ background: floatingAgent.uiConfig?.gradient || floatingAgent.uiConfig?.color }"
>
{{ floatingAgent.uiConfig?.shortLabel }}
</div>
<span class="ft-label">{{ floatingAgent?.uiConfig?.label || floatingAgent?.name }}</span>
</div>
<TranscriptCard :voice="floatingVoice" @done="handleFloatingDone" />
</div>
</div>
</Transition>
</Teleport>
</template>
<style scoped>
@@ -281,10 +388,98 @@ onBeforeUnmount(() => {
pointer-events: none;
}
/* Floating transcript */
.float-transcript-backdrop {
position: fixed;
inset: 0;
z-index: 9999;
}
.float-transcript-panel {
background: rgba(15, 10, 26, 0.9);
backdrop-filter: blur(20px);
-webkit-backdrop-filter: blur(20px);
border: 1px solid rgba(255, 255, 255, 0.1);
border-radius: 14px;
padding: 10px 12px;
box-shadow:
0 8px 32px rgba(0, 0, 0, 0.4),
0 0 0 1px rgba(255, 255, 255, 0.05) inset;
transform-origin: bottom center;
}
.ft-header {
display: flex;
align-items: center;
gap: 8px;
margin-bottom: 4px;
}
.ft-badge {
width: 22px;
height: 22px;
border-radius: 7px;
display: flex;
align-items: center;
justify-content: center;
color: #fff;
font-size: 9px;
font-weight: 700;
flex-shrink: 0;
}
.ft-label {
font-size: 12px;
font-weight: 500;
color: rgba(255, 255, 255, 0.5);
font-family: system-ui, sans-serif;
}
/* Transition */
.float-transcript-enter-active {
transition: opacity 0.2s ease;
}
.float-transcript-enter-active .float-transcript-panel {
animation: ft-enter 0.3s cubic-bezier(0.34, 1.56, 0.64, 1) both;
}
.float-transcript-enter-from {
opacity: 0;
}
.float-transcript-leave-active {
transition: opacity 0.15s ease;
}
.float-transcript-leave-active .float-transcript-panel {
animation: ft-leave 0.15s ease both;
}
.float-transcript-leave-to {
opacity: 0;
}
@keyframes ft-enter {
0% { opacity: 0; transform: translateY(10px) scale(0.9); }
100% { opacity: 1; transform: translateY(0) scale(1); }
}
@keyframes ft-leave {
0% { opacity: 1; transform: translateY(0) scale(1); }
100% { opacity: 0; transform: translateY(8px) scale(0.95); }
}
@media (max-width: 768px) {
.agent-bubbles {
bottom: 80px;
gap: 12px;
}
.float-transcript-panel {
left: 8px !important;
right: 8px;
width: auto !important;
}
}
</style>