feat: Push-to-talk on voice FAB button

- Hold FAB to open panel and start recording immediately
- Release to stop recording and send after 1s buffer
- Orange pulsing animation when PTT active
- PTT also works on record button inside modal
- Added stopRecordingAndSend exposed method
This commit is contained in:
2026-02-14 04:51:50 -06:00
parent 12a95c6206
commit f9b5ad3db6
4 changed files with 263 additions and 71 deletions

View File

@@ -1,4 +1,67 @@
{
"permissions": {
"allow": [
"Skill(playwright-cli)",
"Bash(playwright-cli close:*)",
"Bash(playwright-cli open:*)",
"Bash(playwright-cli screenshot:*)",
"WebFetch(domain:github.com)",
"Bash(playwright-cli goto:*)",
"Bash(playwright-cli snapshot:*)",
"Bash(playwright-cli mousewheel:*)",
"Bash(playwright-cli eval:*)",
"Bash(playwright-cli tab-list:*)",
"Bash(playwright-cli tab-select:*)",
"Bash(playwright-cli click:*)",
"Bash(playwright-cli press:*)",
"WebFetch(domain:gitea.nucleoriofrio.com)",
"Bash(dir \"C:\\\\Users\\\\jodar\\\\agent-ui\")",
"WebSearch",
"Bash(cmd /c \"bun --version\")",
"Bash(powershell -Command \"bun --version\")",
"Bash(C:Usersjodar.bunbinbun.exe create vite . --template vue-ts)",
"mcp__agent-ui___webmcp_get-token",
"mcp__agent-ui___webmcp_quitar-tool",
"mcp__agent-ui__localhost_3000-render_html",
"mcp__agent-ui__localhost_4100-navigate_to",
"mcp__agent-ui__localhost_4100-get_design_tokens",
"mcp__agent-ui__localhost_4100-set_theme_variable",
"mcp__agent-ui__localhost_4100-list_available_tools",
"mcp__agent-ui__localhost_4100-switch_theme",
"mcp__agent-ui__localhost_4100-set_default_theme",
"mcp__agent-ui__localhost_4100-save_theme",
"mcp__agent-ui___webmcp_browser-info",
"mcp__agent-ui__localhost_4100-render_vue_component",
"Bash(bun remove:*)",
"Bash(bun add:*)",
"mcp__agent-ui__localhost_4100-confetti",
"mcp__agent-ui__localhost_4100-get_current_page",
"mcp__agent-ui___webmcp_server-info",
"mcp__agent-ui__localhost_4100-toggle_pin_tool",
"mcp__agent-ui__localhost_4100-pin_tool",
"Bash(npx vue-tsc:*)",
"mcp__agent-ui__localhost_4100-activate_tool",
"mcp__agent-ui__localhost_4100-terminal_open",
"mcp__agent-ui__localhost_4100-terminal_move",
"mcp__agent-ui__localhost_4100-terminal_resize",
"mcp__agent-ui__localhost_4100-terminal_toggle",
"mcp__agent-ui__localhost_4100-terminal_close",
"mcp__agent-ui__localhost_4100-bubbleResponse",
"mcp__agent-ui__localhost_4100-notificar",
"mcp__agent-ui__localhost_4100-enviar_al_panel",
"mcp__agent-ui__localhost_4100-render_html",
"mcp__agent-ui__localhost_4100-load_vue_component",
"mcp__agent-ui__localhost_4100-page_refresh",
"WebFetch(domain:docs.anthropic.com)",
"mcp__agent-ui__z590_nucleoriofrio_com-bubbleResponse",
"Bash(git add:*)",
"Bash(git commit:*)"
]
},
"enableAllProjectMcpServers": true,
"enabledMcpjsonServers": [
"agent-ui"
],
"hooks": {
"UserPromptSubmit": [
{
@@ -125,65 +188,5 @@
]
}
]
},
"permissions": {
"allow": [
"Skill(playwright-cli)",
"Bash(playwright-cli close:*)",
"Bash(playwright-cli open:*)",
"Bash(playwright-cli screenshot:*)",
"WebFetch(domain:github.com)",
"Bash(playwright-cli goto:*)",
"Bash(playwright-cli snapshot:*)",
"Bash(playwright-cli mousewheel:*)",
"Bash(playwright-cli eval:*)",
"Bash(playwright-cli tab-list:*)",
"Bash(playwright-cli tab-select:*)",
"Bash(playwright-cli click:*)",
"Bash(playwright-cli press:*)",
"WebFetch(domain:gitea.nucleoriofrio.com)",
"Bash(dir \"C:\\\\Users\\\\jodar\\\\agent-ui\")",
"WebSearch",
"Bash(cmd /c \"bun --version\")",
"Bash(powershell -Command \"bun --version\")",
"Bash(C:Usersjodar.bunbinbun.exe create vite . --template vue-ts)",
"mcp__agent-ui___webmcp_get-token",
"mcp__agent-ui___webmcp_quitar-tool",
"mcp__agent-ui__localhost_3000-render_html",
"mcp__agent-ui__localhost_4100-navigate_to",
"mcp__agent-ui__localhost_4100-get_design_tokens",
"mcp__agent-ui__localhost_4100-set_theme_variable",
"mcp__agent-ui__localhost_4100-list_available_tools",
"mcp__agent-ui__localhost_4100-switch_theme",
"mcp__agent-ui__localhost_4100-set_default_theme",
"mcp__agent-ui__localhost_4100-save_theme",
"mcp__agent-ui___webmcp_browser-info",
"mcp__agent-ui__localhost_4100-render_vue_component",
"Bash(bun remove:*)",
"Bash(bun add:*)",
"mcp__agent-ui__localhost_4100-confetti",
"mcp__agent-ui__localhost_4100-get_current_page",
"mcp__agent-ui___webmcp_server-info",
"mcp__agent-ui__localhost_4100-toggle_pin_tool",
"mcp__agent-ui__localhost_4100-pin_tool",
"Bash(npx vue-tsc:*)",
"mcp__agent-ui__localhost_4100-activate_tool",
"mcp__agent-ui__localhost_4100-terminal_open",
"mcp__agent-ui__localhost_4100-terminal_move",
"mcp__agent-ui__localhost_4100-terminal_resize",
"mcp__agent-ui__localhost_4100-terminal_toggle",
"mcp__agent-ui__localhost_4100-terminal_close",
"mcp__agent-ui__localhost_4100-bubbleResponse",
"mcp__agent-ui__localhost_4100-notificar",
"mcp__agent-ui__localhost_4100-enviar_al_panel",
"mcp__agent-ui__localhost_4100-render_html",
"mcp__agent-ui__localhost_4100-load_vue_component",
"mcp__agent-ui__localhost_4100-page_refresh",
"WebFetch(domain:docs.anthropic.com)"
]
},
"enableAllProjectMcpServers": true,
"enabledMcpjsonServers": [
"agent-ui"
]
}
}

View File

@@ -63,8 +63,14 @@ function clearDebugLogs() {
}
const terminalRef = ref<InstanceType<typeof FloatingTerminal> | null>(null)
const responseRef = ref<InstanceType<typeof FloatingResponse> | null>(null)
const voiceRef = ref<InstanceType<typeof FloatingVoice> | null>(null)
const canvasStore = useCanvasStore()
// Voice FAB push-to-talk state
const voicePTTActive = ref(false)
let voiceTouchStarted = false
let voicePTTTimeout: number | null = null
// Claude status state (for FAB animations)
type ClaudeStatus = 'idle' | 'processing' | 'toolUse' | 'toolDone' | 'reading' | 'writing' | 'sessionStart' | 'subagentStart' | 'subagentStop' | 'notification' | 'permissionRequest' | 'thinking'
@@ -90,6 +96,45 @@ function hardRefresh() {
location.reload()
}
// Voice FAB push-to-talk handlers
function handleVoiceFabClick() {
// If touch just ended, ignore click
if (voiceTouchStarted) {
voiceTouchStarted = false
return
}
// Normal click: toggle panel
showVoice.value = !showVoice.value
}
function handleVoiceFabTouchStart(e: TouchEvent) {
e.preventDefault()
voiceTouchStarted = true
voicePTTActive.value = true
// Open panel and start recording
showVoice.value = true
// Wait a moment for panel to open, then start recording
setTimeout(() => {
voiceRef.value?.startRecording()
}, 100)
}
function handleVoiceFabTouchEnd(e: TouchEvent) {
e.preventDefault()
if (!voicePTTActive.value) return
// Add buffer before stopping
voicePTTTimeout = window.setTimeout(() => {
voiceRef.value?.stopRecordingAndSend()
voicePTTActive.value = false
}, 1000)
setTimeout(() => { voiceTouchStarted = false }, 100)
}
function connectStatusWs() {
if (statusWs?.readyState === WebSocket.OPEN) return
@@ -418,9 +463,12 @@ watch(() => route.name, (newPage) => {
<!-- Voice FAB Button -->
<button
class="voice-fab"
:class="{ active: showVoice, 'sheet-open': showTerminal || showVoice }"
@click="showVoice = !showVoice"
title="Voice Input"
:class="{ active: showVoice, 'sheet-open': showTerminal || showVoice, 'ptt-active': voicePTTActive }"
@click="handleVoiceFabClick"
@touchstart="handleVoiceFabTouchStart"
@touchend="handleVoiceFabTouchEnd"
@touchcancel="handleVoiceFabTouchEnd"
title="Voice Input (mantén presionado para PTT)"
>
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
@@ -437,7 +485,7 @@ watch(() => route.name, (newPage) => {
<FloatingResponse ref="responseRef" />
<!-- Floating Voice Input -->
<FloatingVoice v-model="showVoice" />
<FloatingVoice ref="voiceRef" v-model="showVoice" />
<!-- Debug Console Panel -->
<Teleport to="body">
@@ -944,6 +992,19 @@ watch(() => route.name, (newPage) => {
box-shadow: 0 8px 24px rgba(239, 68, 68, 0.5);
}
/* Voice FAB PTT active - recording in progress */
.voice-fab.ptt-active {
background: linear-gradient(135deg, #f97316 0%, #ea580c 100%);
box-shadow: 0 0 30px rgba(249, 115, 22, 0.7);
transform: scale(1.15);
animation: ptt-pulse 0.5s ease-in-out infinite;
}
@keyframes ptt-pulse {
0%, 100% { box-shadow: 0 0 30px rgba(249, 115, 22, 0.7); }
50% { box-shadow: 0 0 50px rgba(249, 115, 22, 0.9); }
}
@media (max-width: 768px) {
.terminal-fab {
bottom: 16px;

View File

@@ -95,12 +95,14 @@ const showMicSelector = ref(false)
// ============ MOBILE DETECTION & AUDIO FORMAT ============
const isMobile = ref(false)
const isAndroid = ref(false)
const isMobilePTT = ref(false) // Mobile push-to-talk active
const supportedMimeType = ref('audio/webm;codecs=opus')
const sheetHeight = ref(45) // percentage of viewport for mobile
const isDraggingSheet = ref(false)
const sheetDragStart = ref({ y: 0, height: 0 })
const keyboardHeight = ref(0)
const snapPoints = [25, 45, 70] // collapsed, default, expanded
let mobilePTTTimeout: number | null = null
function checkMobile() {
const ua = navigator.userAgent
@@ -537,10 +539,20 @@ function connectWhisperSocket() {
}
if (whisperSocket?.readyState === WebSocket.OPEN) return
console.log('[Voice] Connecting to Whisper server...')
console.log('[Voice] Connecting to Whisper server at:', WHISPER_WS_URL)
whisperSocket = new WebSocket(WHISPER_WS_URL)
// Connection timeout
const connectionTimeout = setTimeout(() => {
if (whisperSocket && whisperSocket.readyState !== WebSocket.OPEN) {
console.error('[Voice] Whisper connection timeout (10s)')
whisperSocket.close()
whisperReady.value = false
}
}, 10000)
whisperSocket.onopen = () => {
clearTimeout(connectionTimeout)
console.log('[Voice] Whisper WebSocket connected')
whisperReady.value = true
}
@@ -799,6 +811,75 @@ function stopRecording() {
interimTranscript.value = ''
}
// ============ MOBILE PUSH-TO-TALK ============
let touchStarted = false
function handleRecClick() {
// If touch just ended, ignore the click (prevents double-trigger)
if (touchStarted) {
touchStarted = false
return
}
// Normal click behavior
toggleRecording()
}
function handleRecTouchStart(e: TouchEvent) {
e.preventDefault()
touchStarted = true
// Clear any pending timeout
if (mobilePTTTimeout) {
clearTimeout(mobilePTTTimeout)
mobilePTTTimeout = null
}
isMobilePTT.value = true
isPushToTalk.value = true
// Start recording immediately
if (!isRecording.value) {
startRecording()
}
console.log('[Voice] Mobile PTT started')
}
function handleRecTouchEnd(e: TouchEvent) {
e.preventDefault()
if (!isMobilePTT.value) return
console.log('[Voice] Mobile PTT released')
// Add 1s buffer before stopping (to capture trailing words)
mobilePTTTimeout = window.setTimeout(() => {
stopRecording()
isMobilePTT.value = false
// Wait a moment for final transcription, then send
setTimeout(() => {
if (useWhisper.value) {
// Whisper: wait for server response
pendingWhisperSend = true
console.log('[Voice] Waiting for Whisper transcription...')
} else {
// Web Speech API: send after short delay
setTimeout(() => {
if (transcript.value.trim()) {
sendTranscriptAndClose()
} else {
isPushToTalk.value = false
}
}, 300)
}
}, 200)
}, 1000)
// Reset touch flag after a short delay
setTimeout(() => { touchStarted = false }, 100)
}
function clearTranscript() {
transcript.value = ''
interimTranscript.value = ''
@@ -1139,12 +1220,34 @@ watch(isOpen, (open) => {
}
})
// Stop recording and send (for external PTT control)
function stopRecordingAndSend() {
stopRecording()
// Wait a moment for final transcription, then send
setTimeout(() => {
if (useWhisper.value) {
// Whisper: wait for server response
pendingWhisperSend = true
console.log('[Voice] Waiting for Whisper transcription...')
} else {
// Web Speech API: send after short delay
setTimeout(() => {
if (transcript.value.trim()) {
sendTranscriptAndClose()
}
}, 300)
}
}, 200)
}
// Expose methods
defineExpose({
open: () => { isOpen.value = true },
close,
startRecording,
stopRecording,
stopRecordingAndSend,
getTranscript: () => transcript.value
})
</script>
@@ -1264,7 +1367,7 @@ defineExpose({
<span class="final">{{ animatedTranscript }}</span><span class="cursor" v-if="animatedTranscript && animatedTranscript.length < transcript.length">|</span>
<span class="interim">{{ interimTranscript }}</span>
<span v-if="!animatedTranscript && !interimTranscript" class="placeholder">
Presiona el micrófono o mantén Ctrl+Space...
{{ isMobile ? 'Mantén presionado el micrófono para grabar...' : 'Presiona el micrófono o mantén Ctrl+Space...' }}
</span>
</div>
@@ -1275,9 +1378,12 @@ defineExpose({
<div class="controls">
<button
class="rec-btn"
:class="{ active: isRecording }"
@click="toggleRecording"
:title="isRecording ? 'Stop' : 'Record'"
:class="{ active: isRecording, ptt: isMobilePTT }"
@click="handleRecClick"
@touchstart="handleRecTouchStart"
@touchend="handleRecTouchEnd"
@touchcancel="handleRecTouchEnd"
:title="isRecording ? 'Stop' : (isMobile ? 'Mantén presionado para grabar' : 'Record')"
>
<svg v-if="!isRecording" width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
@@ -1716,6 +1822,28 @@ defineExpose({
animation: pulse 1s infinite;
}
/* Mobile PTT - larger button with special style */
.rec-btn.ptt {
background: linear-gradient(180deg, #f97316 0%, #ea580c 100%);
border-color: #c2410c;
color: #fff;
transform: scale(1.15);
box-shadow: 0 0 20px rgba(249, 115, 22, 0.6);
}
/* Make rec button bigger on mobile */
@media (pointer: coarse) {
.rec-btn {
width: 52px;
height: 52px;
}
.rec-btn svg {
width: 22px;
height: 22px;
}
}
.action-btn {
height: 28px;
padding: 0 10px;

View File

@@ -77,7 +77,7 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
pass
# Configuration
HOST = "localhost"
HOST = "0.0.0.0" # Listen on all interfaces (needed for Traefik proxy)
PORT = 4104
MODEL_SIZE = "large-v3" # Best standard model for Spanish
DEVICE = "cuda" # cuda or cpu