feat: Add microphone selection and audio playback to FloatingVoice
- Add microphone device enumeration and selector dropdown - Show current microphone name with click-to-change UI - Microphone selection only available with Whisper GPU mode - Add audio playback button to replay last recorded audio for debugging - Improve dropdown animations with staggered item transitions - Fix FloatingTerminal token request to type character by character
This commit is contained in:
@@ -384,7 +384,19 @@ function requestToken() {
|
||||
if (socket && socket.readyState === WebSocket.OPEN) {
|
||||
tokenBuffer = ''
|
||||
waitingForToken.value = true
|
||||
socket.send(JSON.stringify({ type: 'input', data: 'genera token usando tu mcp\r' }))
|
||||
|
||||
// Send character by character then Enter (same as VoiceFloat)
|
||||
const text = 'genera token usando tu mcp'
|
||||
const chars = (text + '\r').split('')
|
||||
let i = 0
|
||||
const typeChar = () => {
|
||||
if (i < chars.length && socket && socket.readyState === WebSocket.OPEN) {
|
||||
socket.send(JSON.stringify({ type: 'input', data: chars[i] }))
|
||||
i++
|
||||
setTimeout(typeChar, 15)
|
||||
}
|
||||
}
|
||||
typeChar()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,6 +62,86 @@ let chunkInterval: number | null = null
|
||||
const CHUNK_INTERVAL_MS = 3000 // Send audio every 3 seconds
|
||||
let mediaStream: MediaStream | null = null
|
||||
|
||||
// ============ MICROPHONE SELECTION ============
|
||||
const audioDevices = ref<MediaDeviceInfo[]>([])
|
||||
const selectedDeviceId = ref<string>('')
|
||||
const showMicSelector = ref(false)
|
||||
|
||||
// ============ AUDIO PLAYBACK (DEBUG) ============
|
||||
const lastAudioUrl = ref<string>('')
|
||||
const isPlayingAudio = ref(false)
|
||||
let audioElement: HTMLAudioElement | null = null
|
||||
|
||||
function playLastAudio() {
|
||||
if (!lastAudioUrl.value) return
|
||||
|
||||
if (isPlayingAudio.value && audioElement) {
|
||||
audioElement.pause()
|
||||
audioElement.currentTime = 0
|
||||
isPlayingAudio.value = false
|
||||
return
|
||||
}
|
||||
|
||||
audioElement = new Audio(lastAudioUrl.value)
|
||||
audioElement.onplay = () => { isPlayingAudio.value = true }
|
||||
audioElement.onended = () => { isPlayingAudio.value = false }
|
||||
audioElement.onpause = () => { isPlayingAudio.value = false }
|
||||
audioElement.play().catch(e => {
|
||||
console.error('[Voice] Failed to play audio:', e)
|
||||
isPlayingAudio.value = false
|
||||
})
|
||||
}
|
||||
|
||||
function saveAudioForPlayback(blob: Blob) {
|
||||
// Revoke previous URL to free memory
|
||||
if (lastAudioUrl.value) {
|
||||
URL.revokeObjectURL(lastAudioUrl.value)
|
||||
}
|
||||
lastAudioUrl.value = URL.createObjectURL(blob)
|
||||
}
|
||||
|
||||
const currentMicName = computed(() => {
|
||||
if (!selectedDeviceId.value) return 'Default'
|
||||
const device = audioDevices.value.find(d => d.deviceId === selectedDeviceId.value)
|
||||
return device?.label || 'Microphone'
|
||||
})
|
||||
|
||||
async function loadAudioDevices() {
|
||||
try {
|
||||
// Request permission first to get device labels
|
||||
const tempStream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
tempStream.getTracks().forEach(track => track.stop())
|
||||
|
||||
const devices = await navigator.mediaDevices.enumerateDevices()
|
||||
audioDevices.value = devices.filter(d => d.kind === 'audioinput')
|
||||
|
||||
// Set default device if none selected
|
||||
if (!selectedDeviceId.value && audioDevices.value.length > 0) {
|
||||
selectedDeviceId.value = audioDevices.value[0]?.deviceId || ''
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[Voice] Failed to enumerate devices:', e)
|
||||
}
|
||||
}
|
||||
|
||||
function selectMicrophone(deviceId: string) {
|
||||
selectedDeviceId.value = deviceId
|
||||
showMicSelector.value = false
|
||||
|
||||
// If currently recording, restart with new device
|
||||
if (isRecording.value) {
|
||||
stopRecording()
|
||||
setTimeout(() => startRecording(), 100)
|
||||
}
|
||||
}
|
||||
|
||||
function closeMicSelector(e: MouseEvent) {
|
||||
const target = e.target as HTMLElement
|
||||
if (!target.closest('.mic-bar') && !target.closest('.mic-dropdown')) {
|
||||
showMicSelector.value = false
|
||||
}
|
||||
}
|
||||
|
||||
const displayText = computed(() => {
|
||||
if (interimTranscript.value) {
|
||||
return transcript.value + ' ' + interimTranscript.value
|
||||
@@ -334,7 +414,10 @@ function disconnectWhisperSocket() {
|
||||
|
||||
async function startWhisperRecording() {
|
||||
try {
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
const audioConstraints: MediaTrackConstraints = selectedDeviceId.value
|
||||
? { deviceId: { exact: selectedDeviceId.value } }
|
||||
: {}
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints })
|
||||
|
||||
mediaRecorder = new MediaRecorder(mediaStream, {
|
||||
mimeType: 'audio/webm;codecs=opus'
|
||||
@@ -389,6 +472,8 @@ function sendAudioChunk(isFinal: boolean) {
|
||||
if (isFinal) {
|
||||
audioChunks = []
|
||||
lastTranscriptLength = 0
|
||||
// Save audio for playback debugging
|
||||
saveAudioForPlayback(audioBlob)
|
||||
}
|
||||
|
||||
const reader = new FileReader()
|
||||
@@ -743,6 +828,9 @@ onMounted(async () => {
|
||||
document.addEventListener('keydown', handleKeyDown, { capture: true })
|
||||
document.addEventListener('keyup', handleKeyUp, { capture: true })
|
||||
|
||||
// Load available audio devices
|
||||
await loadAudioDevices()
|
||||
|
||||
// Check Whisper status on mount
|
||||
const status = await checkWhisperStatus()
|
||||
|
||||
@@ -765,10 +853,19 @@ onBeforeUnmount(() => {
|
||||
if (mediaStream) {
|
||||
mediaStream.getTracks().forEach(track => track.stop())
|
||||
}
|
||||
// Clean up audio playback
|
||||
if (audioElement) {
|
||||
audioElement.pause()
|
||||
audioElement = null
|
||||
}
|
||||
if (lastAudioUrl.value) {
|
||||
URL.revokeObjectURL(lastAudioUrl.value)
|
||||
}
|
||||
document.removeEventListener('keydown', handleKeyDown, { capture: true })
|
||||
document.removeEventListener('keyup', handleKeyUp, { capture: true })
|
||||
document.removeEventListener('mousemove', onDrag)
|
||||
document.removeEventListener('mouseup', stopDrag)
|
||||
document.removeEventListener('click', closeMicSelector)
|
||||
if (holdTimeout) clearTimeout(holdTimeout)
|
||||
})
|
||||
|
||||
@@ -776,8 +873,11 @@ onBeforeUnmount(() => {
|
||||
watch(isOpen, (open) => {
|
||||
if (open) {
|
||||
connectSocket()
|
||||
document.addEventListener('click', closeMicSelector)
|
||||
} else {
|
||||
disconnectSocket()
|
||||
showMicSelector.value = false
|
||||
document.removeEventListener('click', closeMicSelector)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -840,6 +940,46 @@ defineExpose({
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Microphone Info Bar -->
|
||||
<div
|
||||
class="mic-bar"
|
||||
:class="{ disabled: !useWhisper }"
|
||||
@click.stop="useWhisper ? (showMicSelector = !showMicSelector) : null"
|
||||
:title="useWhisper ? 'Click to change microphone' : 'Microphone selection only available with Whisper GPU'"
|
||||
>
|
||||
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
|
||||
</svg>
|
||||
<span class="mic-name">{{ useWhisper ? currentMicName : 'System Default' }}</span>
|
||||
<template v-if="useWhisper">
|
||||
<svg class="chevron" :class="{ open: showMicSelector }" width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3">
|
||||
<polyline points="6 9 12 15 18 9"/>
|
||||
</svg>
|
||||
</template>
|
||||
<span v-else class="mic-note">(Web API)</span>
|
||||
</div>
|
||||
|
||||
<!-- Microphone Selector Dropdown -->
|
||||
<Transition name="dropdown">
|
||||
<div v-if="showMicSelector" class="mic-dropdown">
|
||||
<div
|
||||
v-for="device in audioDevices"
|
||||
:key="device.deviceId"
|
||||
class="mic-option"
|
||||
:class="{ active: device.deviceId === selectedDeviceId }"
|
||||
@click.stop="selectMicrophone(device.deviceId)"
|
||||
>
|
||||
<svg v-if="device.deviceId === selectedDeviceId" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3">
|
||||
<polyline points="20 6 9 17 4 12"/>
|
||||
</svg>
|
||||
<span>{{ device.label || `Microphone ${audioDevices.indexOf(device) + 1}` }}</span>
|
||||
</div>
|
||||
<div v-if="audioDevices.length === 0" class="mic-option disabled">
|
||||
No microphones found
|
||||
</div>
|
||||
</div>
|
||||
</Transition>
|
||||
|
||||
<!-- Content -->
|
||||
<div class="content">
|
||||
<div class="transcript" :class="{ empty: !animatedTranscript && !interimTranscript }">
|
||||
@@ -879,6 +1019,22 @@ defineExpose({
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<button
|
||||
class="action-btn play"
|
||||
:class="{ playing: isPlayingAudio }"
|
||||
@click="playLastAudio"
|
||||
title="Play last audio"
|
||||
:disabled="!lastAudioUrl"
|
||||
>
|
||||
<svg v-if="!isPlayingAudio" width="14" height="14" viewBox="0 0 24 24" fill="currentColor">
|
||||
<polygon points="5 3 19 12 5 21 5 3"/>
|
||||
</svg>
|
||||
<svg v-else width="14" height="14" viewBox="0 0 24 24" fill="currentColor">
|
||||
<rect x="6" y="4" width="4" height="16"/>
|
||||
<rect x="14" y="4" width="4" height="16"/>
|
||||
</svg>
|
||||
</button>
|
||||
|
||||
<button
|
||||
class="action-btn send"
|
||||
@click="sendTranscript"
|
||||
@@ -910,6 +1066,7 @@ defineExpose({
|
||||
}
|
||||
|
||||
.glass {
|
||||
position: relative;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: rgba(200, 215, 235, 0.35);
|
||||
@@ -921,7 +1078,7 @@ defineExpose({
|
||||
0 0 0 1px rgba(80, 120, 180, 0.25),
|
||||
0 6px 24px rgba(0, 0, 0, 0.25),
|
||||
inset 0 1px 0 rgba(255, 255, 255, 0.6);
|
||||
overflow: hidden;
|
||||
overflow: visible;
|
||||
}
|
||||
|
||||
.titlebar {
|
||||
@@ -1051,6 +1208,149 @@ defineExpose({
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
/* Microphone Bar */
|
||||
.mic-bar {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
padding: 6px 10px;
|
||||
background: rgba(0, 0, 0, 0.05);
|
||||
border-bottom: 1px solid rgba(255, 255, 255, 0.2);
|
||||
cursor: pointer;
|
||||
font-size: 10px;
|
||||
color: #444;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
.mic-bar:hover:not(.disabled) {
|
||||
background: rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
.mic-bar.disabled {
|
||||
cursor: default;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.mic-note {
|
||||
font-size: 9px;
|
||||
color: #888;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.mic-name {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.mic-bar .chevron {
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
|
||||
.mic-bar .chevron.open {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
|
||||
/* Microphone Dropdown */
|
||||
.mic-dropdown {
|
||||
position: absolute;
|
||||
top: 52px;
|
||||
left: 0;
|
||||
right: 0;
|
||||
background: rgba(255, 255, 255, 0.95);
|
||||
backdrop-filter: blur(12px);
|
||||
border: 1px solid rgba(0, 0, 0, 0.1);
|
||||
border-radius: 0 0 6px 6px;
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
|
||||
z-index: 10;
|
||||
max-height: 200px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.mic-option {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
padding: 8px 12px;
|
||||
font-size: 11px;
|
||||
color: #333;
|
||||
cursor: pointer;
|
||||
transition: background 0.1s;
|
||||
}
|
||||
|
||||
.mic-option:hover:not(.disabled) {
|
||||
background: rgba(74, 153, 153, 0.1);
|
||||
}
|
||||
|
||||
.mic-option.active {
|
||||
background: rgba(74, 153, 153, 0.15);
|
||||
color: #2a7;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.mic-option.disabled {
|
||||
color: #999;
|
||||
cursor: default;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.mic-option svg {
|
||||
flex-shrink: 0;
|
||||
color: #2a7;
|
||||
}
|
||||
|
||||
.mic-option span {
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* Staggered animation for mic options */
|
||||
.mic-dropdown .mic-option {
|
||||
animation: slideIn 0.2s ease forwards;
|
||||
opacity: 0;
|
||||
}
|
||||
|
||||
.mic-dropdown .mic-option:nth-child(1) { animation-delay: 0.02s; }
|
||||
.mic-dropdown .mic-option:nth-child(2) { animation-delay: 0.04s; }
|
||||
.mic-dropdown .mic-option:nth-child(3) { animation-delay: 0.06s; }
|
||||
.mic-dropdown .mic-option:nth-child(4) { animation-delay: 0.08s; }
|
||||
.mic-dropdown .mic-option:nth-child(5) { animation-delay: 0.10s; }
|
||||
|
||||
@keyframes slideIn {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateX(-8px);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateX(0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Dropdown transition */
|
||||
.dropdown-enter-active {
|
||||
transition: all 0.2s cubic-bezier(0.34, 1.56, 0.64, 1);
|
||||
}
|
||||
|
||||
.dropdown-leave-active {
|
||||
transition: all 0.15s cubic-bezier(0.4, 0, 1, 1);
|
||||
}
|
||||
|
||||
.dropdown-enter-from {
|
||||
opacity: 0;
|
||||
transform: translateY(-10px) scaleY(0.8);
|
||||
transform-origin: top center;
|
||||
}
|
||||
|
||||
.dropdown-leave-to {
|
||||
opacity: 0;
|
||||
transform: translateY(-6px) scaleY(0.9);
|
||||
transform-origin: top center;
|
||||
}
|
||||
|
||||
.content {
|
||||
padding: 12px;
|
||||
min-height: 80px;
|
||||
@@ -1156,6 +1456,22 @@ defineExpose({
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.action-btn.play {
|
||||
background: linear-gradient(180deg, #6b7280 0%, #4b5563 100%);
|
||||
border-color: #374151;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.action-btn.play:hover:not(:disabled) {
|
||||
background: linear-gradient(180deg, #9ca3af 0%, #6b7280 100%);
|
||||
}
|
||||
|
||||
.action-btn.play.playing {
|
||||
background: linear-gradient(180deg, #f59e0b 0%, #d97706 100%);
|
||||
border-color: #b45309;
|
||||
animation: pulse 1s infinite;
|
||||
}
|
||||
|
||||
.action-btn.send {
|
||||
margin-left: auto;
|
||||
background: linear-gradient(180deg, #4a9 0%, #3a8 100%);
|
||||
|
||||
Reference in New Issue
Block a user