agent-ui/frontend/src/components/FloatingVoice.vue

<script setup lang="ts">
import { ref, computed, onMounted, onBeforeUnmount, watch } from 'vue'
import { useCanvasStore } from '../stores/canvas'

const props = defineProps<{
  modelValue: boolean
}>()

const emit = defineEmits<{
  'update:modelValue': [value: boolean]
}>()

const canvasStore = useCanvasStore()

const isOpen = computed({
  get: () => props.modelValue,
  set: (val) => emit('update:modelValue', val)
})

// Recording state
const isRecording = ref(false)
const transcript = ref('')
const interimTranscript = ref('')
const error = ref('')

// Typing animation state
const animatedTranscript = ref('')
let typingTimeout: number | null = null
let lastAnimatedLength = 0

// Position and drag state
const position = ref({ x: 0, y: 0 })
const hasCustomPosition = ref(false)
const isDragging = ref(false)
const dragOffset = ref({ x: 0, y: 0 })
const containerRef = ref<HTMLElement | null>(null)

// Speech recognition (Web Speech API)
let recognition: SpeechRecognition | null = null

// WebSocket connection to terminal
const WS_URL = `ws://${window.location.hostname}:4103`
let socket: WebSocket | null = null
const connected = ref(false)

// Push-to-talk state (Ctrl+Space)
let keyDownTime = 0
let holdTimeout: number | null = null
const isPushToTalk = ref(false)
let pendingWhisperSend = false // Flag to send transcript when Whisper responds

// ============ WHISPER MODE ============
const useWhisper = ref(false)
const whisperReady = ref(false)
const whisperLoading = ref(false)
const WHISPER_WS_URL = `ws://${window.location.hostname}:4104`
let whisperSocket: WebSocket | null = null
let mediaRecorder: MediaRecorder | null = null
let audioChunks: Blob[] = []
let lastTranscriptLength = 0 // Track length of last transcription to show only new text
let chunkInterval: number | null = null
const CHUNK_INTERVAL_MS = 3000 // Send audio every 3 seconds
let mediaStream: MediaStream | null = null

// ============ MICROPHONE SELECTION ============
const audioDevices = ref<MediaDeviceInfo[]>([])
const selectedDeviceId = ref<string>('')
const showMicSelector = ref(false)

// ============ AUDIO PLAYBACK (DEBUG) ============
const lastAudioUrl = ref<string>('')
const isPlayingAudio = ref(false)
let audioElement: HTMLAudioElement | null = null
let recordingStartTime = 0

function playLastAudio() {
  if (!lastAudioUrl.value) return

  if (isPlayingAudio.value && audioElement) {
    audioElement.pause()
    audioElement.currentTime = 0
    isPlayingAudio.value = false
    return
  }

  audioElement = new Audio(lastAudioUrl.value)
  audioElement.onplay = () => { isPlayingAudio.value = true }
  audioElement.onended = () => { isPlayingAudio.value = false }
  audioElement.onpause = () => { isPlayingAudio.value = false }
  audioElement.play().catch(e => {
    console.error('[Voice] Failed to play audio:', e)
    isPlayingAudio.value = false
  })
}

function saveAudioForPlayback(blob: Blob) {
  // Revoke previous URL to free memory
  if (lastAudioUrl.value) {
    URL.revokeObjectURL(lastAudioUrl.value)
  }
  lastAudioUrl.value = URL.createObjectURL(blob)

  // Also save to backend for training data
  saveRecordingToBackend(blob)
}

async function saveRecordingToBackend(blob: Blob) {
  try {
    const duration_ms = Date.now() - recordingStartTime
    const reader = new FileReader()

    reader.onloadend = async () => {
      const base64 = (reader.result as string).split(',')[1]

      const response = await fetch(`http://${window.location.hostname}:4100/api/recordings`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({
          audio: base64,
          transcription: transcript.value.trim(),
          microphone: currentMicName.value,
          duration_ms
        })
      })

      const data = await response.json()
      if (data.success) {
        console.log(`[Voice] Recording saved: ${data.filename} (${(data.size / 1024).toFixed(1)} KB)`)
      } else {
        console.error('[Voice] Failed to save recording:', data.error)
      }
    }

    reader.readAsDataURL(blob)
  } catch (e) {
    console.error('[Voice] Error saving recording:', e)
  }
}

const currentMicName = computed(() => {
  if (!selectedDeviceId.value) return 'Default'
  const device = audioDevices.value.find(d => d.deviceId === selectedDeviceId.value)
  return device?.label || 'Microphone'
})

async function loadAudioDevices() {
  try {
    // Request permission first to get device labels
    const tempStream = await navigator.mediaDevices.getUserMedia({ audio: true })
    tempStream.getTracks().forEach(track => track.stop())

    const devices = await navigator.mediaDevices.enumerateDevices()
    audioDevices.value = devices.filter(d => d.kind === 'audioinput')

    // Set default device if none selected
    if (!selectedDeviceId.value && audioDevices.value.length > 0) {
      selectedDeviceId.value = audioDevices.value[0]?.deviceId || ''
    }
  } catch (e) {
    console.error('[Voice] Failed to enumerate devices:', e)
  }
}

function selectMicrophone(deviceId: string) {
  selectedDeviceId.value = deviceId
  showMicSelector.value = false

  // If currently recording, restart with new device
  if (isRecording.value) {
    stopRecording()
    setTimeout(() => startRecording(), 100)
  }
}

function closeMicSelector(e: MouseEvent) {
  const target = e.target as HTMLElement
  if (!target.closest('.mic-bar') && !target.closest('.mic-dropdown')) {
    showMicSelector.value = false
  }
}

const displayText = computed(() => {
  if (interimTranscript.value) {
    return transcript.value + ' ' + interimTranscript.value
  }
  return transcript.value || 'Presiona el micrófono o mantén Ctrl+Space...'
})

const containerStyle = computed(() => {
  if (!hasCustomPosition.value) {
    return { bottom: '80px', left: '16px' }
  }
  return {
    top: `${position.value.y}px`,
    left: `${position.value.x}px`,
    bottom: 'auto',
    right: 'auto'
  }
})

function initRecognition() {
  const SpeechRecognition = (window as any).SpeechRecognition || (window as any).webkitSpeechRecognition

  if (!SpeechRecognition) {
    error.value = 'Speech recognition not supported in this browser'
    return null
  }

  const rec = new SpeechRecognition()
  rec.continuous = true
  rec.interimResults = true
  rec.lang = 'es-419' // Latin American Spanish (better for accents)

  rec.onresult = (event: SpeechRecognitionEvent) => {
    let interim = ''
    let final = ''

    for (let i = event.resultIndex; i < event.results.length; i++) {
      const result = event.results[i]
      if (result.isFinal) {
        final += result[0].transcript + ' '
      } else {
        interim += result[0].transcript
      }
    }

    if (final) {
      transcript.value += final
    }
    interimTranscript.value = interim
  }

  rec.onerror = (event: SpeechRecognitionErrorEvent) => {
    console.error('[Voice] Recognition error:', event.error)
    if (event.error === 'not-allowed') {
      error.value = 'Microphone access denied'
    } else {
      error.value = `Error: ${event.error}`
    }
    isRecording.value = false
  }

  rec.onend = () => {
    if (isRecording.value && !useWhisper.value) {
      // Restart if still recording (browser stops after silence)
      rec.start()
    }
  }

  return rec
}

// ============ WHISPER FUNCTIONS ============

async function checkWhisperStatus(updateLoading = true) {
  try {
    const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
    const data = await res.json()
    useWhisper.value = data.enabled
    whisperReady.value = data.running
    if (updateLoading) {
      whisperLoading.value = data.starting || false
    }
    return data
  } catch {
    useWhisper.value = false
    whisperReady.value = false
    if (updateLoading) {
      whisperLoading.value = false
    }
    return null
  }
}

async function toggleWhisperMode() {
  // Prevent multiple clicks
  if (whisperLoading.value) {
    console.log('[Voice] Toggle already in progress, ignoring')
    return
  }

  whisperLoading.value = true
  error.value = ''

  // Show immediate feedback
  if (!useWhisper.value) {
    canvasStore.showNotification('Starting Whisper GPU server...', 'info', 10000)
  }

  try {
    const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
      method: 'POST'
    })
    const data = await res.json()

    // Server is starting - poll until ready
    if (data.starting) {
      console.log('[Voice] Server starting, polling for status...')
      await pollWhisperStatus()
      return
    }

    useWhisper.value = data.enabled
    whisperReady.value = data.running

    if (data.enabled) {
      canvasStore.showNotification('Whisper GPU ready!', 'success')
      connectWhisperSocket()
    } else {
      canvasStore.showNotification('Using Web Speech API', 'info')
      disconnectWhisperSocket()
    }
  } catch (e: any) {
    error.value = 'Failed to toggle Whisper'
    canvasStore.showNotification('Error starting Whisper server', 'error')
    console.error('[Voice] Whisper toggle error:', e)
  } finally {
    whisperLoading.value = false
  }
}

// Poll server status until ready or failed
async function pollWhisperStatus() {
  const maxAttempts = 60  // 2 minutes max
  let attempts = 0

  while (attempts < maxAttempts) {
    await new Promise(resolve => setTimeout(resolve, 2000))
    attempts++

    try {
      const status = await checkWhisperStatus(false)  // Don't update loading state

      if (!status) {
        console.log('[Voice] Failed to get status')
        continue
      }

      // Still starting
      if (status.starting) {
        console.log(`[Voice] Still starting... (${attempts * 2}s)`)
        continue
      }

      // Started successfully
      if (status.running && status.enabled) {
        console.log('[Voice] Server ready!')
        canvasStore.showNotification('Whisper GPU ready!', 'success')
        connectWhisperSocket()
        whisperLoading.value = false
        return
      }

      // Failed to start
      console.log('[Voice] Server failed to start')
      canvasStore.showNotification('Whisper server failed to start', 'error')
      whisperLoading.value = false
      return

    } catch (e) {
      console.error('[Voice] Polling error:', e)
    }
  }

  // Timeout
  canvasStore.showNotification('Whisper server timeout', 'error')
  whisperLoading.value = false
}

function connectWhisperSocket() {
  if (whisperSocket?.readyState === WebSocket.OPEN) return

  console.log('[Voice] Connecting to Whisper server...')
  whisperSocket = new WebSocket(WHISPER_WS_URL)

  whisperSocket.onopen = () => {
    console.log('[Voice] Whisper WebSocket connected')
    whisperReady.value = true
  }

  whisperSocket.onmessage = (event) => {
    try {
      const msg = JSON.parse(event.data)

      if (msg.type === 'ready') {
        console.log('[Voice] Whisper ready:', msg.model, msg.device)
        whisperReady.value = true
      } else if (msg.type === 'transcription') {
        if (msg.success && msg.text) {
          const fullText = msg.text.trim()

          if (msg.partial) {
            // For partial results, show full accumulated transcription
            transcript.value = fullText + ' '
            interimTranscript.value = ''
            console.log(`[Voice] 🔄 WHISPER partial:`, fullText)
          } else {
            // Final result
            transcript.value = fullText + ' '
            interimTranscript.value = ''
            console.log(`[Voice] 🎯 WHISPER-GPU (${msg.model}/${msg.device}):`, fullText)

            // Auto-send if push-to-talk was waiting for this
            if (pendingWhisperSend) {
              pendingWhisperSend = false
              console.log('[Voice] Whisper response received, sending transcript')
              if (transcript.value.trim()) {
                sendTranscriptAndClose()
              } else {
                isPushToTalk.value = false
                close()
              }
            }
          }

          // Update last transcript length for next partial
          lastTranscriptLength = fullText.length
        } else if (msg.error) {
          error.value = msg.error
          console.error('[Voice] Whisper error:', msg.error)
          // Clear pending send on error
          if (pendingWhisperSend) {
            pendingWhisperSend = false
            isPushToTalk.value = false
          }
        }
      }
    } catch (e) {
      console.error('[Voice] Whisper message error:', e)
    }
  }

  whisperSocket.onclose = () => {
    console.log('[Voice] Whisper WebSocket closed')
    whisperReady.value = false
  }

  whisperSocket.onerror = (e) => {
    console.error('[Voice] Whisper WebSocket error:', e)
    whisperReady.value = false
  }
}

function disconnectWhisperSocket() {
  if (whisperSocket) {
    whisperSocket.close()
    whisperSocket = null
  }
  whisperReady.value = false
}

async function startWhisperRecording() {
  try {
    const audioConstraints: MediaTrackConstraints = selectedDeviceId.value
      ? { deviceId: { exact: selectedDeviceId.value } }
      : {}
    mediaStream = await navigator.mediaDevices.getUserMedia({ audio: audioConstraints })

    mediaRecorder = new MediaRecorder(mediaStream, {
      mimeType: 'audio/webm;codecs=opus'
    })

    audioChunks = []

    mediaRecorder.ondataavailable = (event) => {
      if (event.data.size > 0) {
        audioChunks.push(event.data)
      }
    }

    // Reset state for new recording
    audioChunks = []
    lastTranscriptLength = 0

    // Start recording
    mediaRecorder.start(100) // Collect data every 100ms
    isRecording.value = true
    recordingStartTime = Date.now()

    // Send chunks periodically for progressive transcription
    chunkInterval = window.setInterval(() => {
      if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) {
        sendAudioChunk(false) // false = partial, don't clear
      }
    }, CHUNK_INTERVAL_MS)

  } catch (e: any) {
    error.value = `Microphone error: ${e.message}`
    console.error('[Voice] Microphone error:', e)
  }
}

function sendAudioChunk(isFinal: boolean) {
  if (audioChunks.length === 0) return

  // Always send ALL accumulated audio (webm needs header from first chunk)
  const audioBlob = new Blob(audioChunks, { type: 'audio/webm' })
  const chunkCount = audioChunks.length

  // Skip if audio is too small (< 5KB) - WebM header alone is ~1-2KB
  if (audioBlob.size < 5000) {
    console.log(`[Voice] Skipping small chunk (${audioBlob.size} bytes)`)
    if (isFinal) {
      audioChunks = []
    }
    return
  }

  // Clear chunks only if final
  if (isFinal) {
    audioChunks = []
    lastTranscriptLength = 0
    // Save audio for playback debugging
    saveAudioForPlayback(audioBlob)
  }

  const reader = new FileReader()
  reader.onloadend = () => {
    const base64 = (reader.result as string).split(',')[1]

    if (whisperSocket?.readyState === WebSocket.OPEN) {
      whisperSocket.send(JSON.stringify({
        type: 'transcribe',
        audio: base64,
        language: 'es',
        partial: !isFinal
      }))
      console.log(`[Voice] Sent ${isFinal ? 'FINAL' : 'partial'} audio (${chunkCount} chunks, ${audioBlob.size} bytes)`)
    }
  }
  reader.readAsDataURL(audioBlob)
}

function stopWhisperRecording() {
  // Clear the chunk interval
  if (chunkInterval) {
    clearInterval(chunkInterval)
    chunkInterval = null
  }

  // Send final chunk
  if (audioChunks.length > 0) {
    sendAudioChunk(true) // true = final
  }

  // Stop recorder
  if (mediaRecorder && mediaRecorder.state !== 'inactive') {
    mediaRecorder.stop()
  }

  // Stop media stream
  if (mediaStream) {
    mediaStream.getTracks().forEach(track => track.stop())
    mediaStream = null
  }

  isRecording.value = false
}

function toggleRecording() {
  if (isRecording.value) {
    stopRecording()
  } else {
    startRecording()
  }
}

function startRecording() {
  error.value = ''

  if (useWhisper.value && whisperReady.value) {
    // Use Whisper GPU mode
    startWhisperRecording()
  } else {
    // Use Web Speech API
    if (!recognition) {
      recognition = initRecognition()
    }
    if (recognition) {
      try {
        recognition.start()
        isRecording.value = true
      } catch (e) {
        console.error('[Voice] Failed to start:', e)
      }
    }
  }
}

function stopRecording() {
  if (useWhisper.value) {
    stopWhisperRecording()
  } else {
    if (recognition) {
      recognition.stop()
    }
    isRecording.value = false
  }
  interimTranscript.value = ''
}

function clearTranscript() {
  transcript.value = ''
  interimTranscript.value = ''
  animatedTranscript.value = ''
  lastAnimatedLength = 0
  if (typingTimeout) {
    clearTimeout(typingTimeout)
    typingTimeout = null
  }
}

function connectSocket() {
  console.log('[Voice] connectSocket called, current socket:', socket?.readyState)
  if (socket && socket.readyState === WebSocket.OPEN) return

  socket = new WebSocket(WS_URL)

  socket.onopen = () => {
    console.log('[Voice] WebSocket connected, readyState:', socket?.readyState)
    connected.value = true
  }

  socket.onclose = () => {
    console.log('[Voice] WebSocket closed')
    connected.value = false
    socket = null
  }

  socket.onerror = (e) => {
    console.error('[Voice] WebSocket error:', e)
    connected.value = false
  }
}

function disconnectSocket() {
  if (socket) {
    socket.close()
    socket = null
    connected.value = false
  }
}

function sendTranscript() {
  const text = transcript.value.trim()
  if (!text) return

  if (!socket || socket.readyState !== WebSocket.OPEN) {
    canvasStore.showNotification('Not connected to terminal', 'error')
    return
  }

  // Send text character by character then Enter
  const chars = (text + '\r').split('')
  let i = 0
  const typeChar = () => {
    if (i < chars.length && socket && socket.readyState === WebSocket.OPEN) {
      socket.send(JSON.stringify({ type: 'input', data: chars[i] }))
      i++
      setTimeout(typeChar, 15)
    } else if (i >= chars.length) {
      canvasStore.showNotification('Voice message sent', 'success')
      clearTranscript()
      close()
    }
  }
  typeChar()
}

function close() {
  stopRecording()
  clearTranscript()
  isOpen.value = false
}

// Drag handlers
function startDrag(e: MouseEvent) {
  if ((e.target as HTMLElement).closest('.window-controls')) return

  isDragging.value = true
  const rect = containerRef.value?.getBoundingClientRect()
  if (rect) {
    dragOffset.value = {
      x: e.clientX - rect.left,
      y: e.clientY - rect.top
    }
  }
  document.addEventListener('mousemove', onDrag)
  document.addEventListener('mouseup', stopDrag)
}

function onDrag(e: MouseEvent) {
  if (!isDragging.value) return

  const w = containerRef.value?.offsetWidth || 300
  const h = containerRef.value?.offsetHeight || 200

  const minX = -w * 0.75
  const maxX = window.innerWidth - w * 0.25
  const minY = -h * 0.75
  const maxY = window.innerHeight - h * 0.25

  position.value = {
    x: Math.max(minX, Math.min(e.clientX - dragOffset.value.x, maxX)),
    y: Math.max(minY, Math.min(e.clientY - dragOffset.value.y, maxY))
  }
  hasCustomPosition.value = true
}

function stopDrag() {
  isDragging.value = false
  document.removeEventListener('mousemove', onDrag)
  document.removeEventListener('mouseup', stopDrag)
}

// Keyboard shortcut handlers (Ctrl+Space)
function handleKeyDown(e: KeyboardEvent) {
  if (e.ctrlKey && e.key === ' ') {
    e.preventDefault()
    e.stopImmediatePropagation() // Prevent terminal and other handlers from receiving

    // Ignore if already holding
    if (keyDownTime > 0) return

    keyDownTime = Date.now()

    // Open panel and connect if not open
    if (!isOpen.value) {
      isOpen.value = true
    }

    // Start recording after 150ms hold
    holdTimeout = window.setTimeout(() => {
      if (keyDownTime > 0 && !isRecording.value) {
        isPushToTalk.value = true
        startRecording()
      }
    }, 150)
  }
}

function handleKeyUp(e: KeyboardEvent) {
  // Only react to Space release when Ctrl+Space was pressed
  if (e.key === ' ' && keyDownTime > 0) {
    e.preventDefault()
    e.stopImmediatePropagation() // Prevent terminal and other handlers from receiving
    console.log('[Voice] Key S released, isPushToTalk:', isPushToTalk.value, 'isRecording:', isRecording.value)

    if (holdTimeout) {
      clearTimeout(holdTimeout)
      holdTimeout = null
    }

    // If was push-to-talk recording, continue recording for 1.5s buffer then stop
    if (isPushToTalk.value && isRecording.value) {
      console.log('[Voice] Key released, continuing recording for 1.5s buffer...')

      // Keep recording for 1.5s more (UX buffer for trailing words)
      setTimeout(() => {
        console.log('[Voice] Buffer complete, stopping recording')
        stopRecording()

        if (useWhisper.value) {
          // For Whisper: wait for server response (handled in onmessage)
          console.log('[Voice] Waiting for Whisper transcription...')
          pendingWhisperSend = true
        } else {
          // For Web Speech API: send after short delay for final results
          setTimeout(() => {
            if (transcript.value.trim()) {
              sendTranscriptAndClose()
            } else {
              isPushToTalk.value = false
              close()
            }
          }, 300)
        }
      }, 1500)
    }

    keyDownTime = 0
  }
}

// Send and close for push-to-talk mode
function sendTranscriptAndClose() {
  console.log('[Voice] sendTranscriptAndClose called')
  const text = transcript.value.trim()
  if (!text) {
    console.log('[Voice] No text, closing')
    isPushToTalk.value = false
    close()
    return
  }

  console.log('[Voice] Text to send:', text)
  console.log('[Voice] Socket:', socket, 'readyState:', socket?.readyState)

  if (!socket || socket.readyState !== WebSocket.OPEN) {
    canvasStore.showNotification('Not connected to terminal', 'error')
    isPushToTalk.value = false
    return
  }

  // Send text character by character then Enter
  const chars = (text + '\r').split('')
  let i = 0
  const typeChar = () => {
    if (i < chars.length && socket && socket.readyState === WebSocket.OPEN) {
      socket.send(JSON.stringify({ type: 'input', data: chars[i] }))
      i++
      setTimeout(typeChar, 15)
    } else if (i >= chars.length) {
      canvasStore.showNotification('Voice message sent', 'success')
      clearTranscript()
      isPushToTalk.value = false
      close()
    }
  }
  typeChar()
}

// Typing animation effect
function animateTyping(targetText: string) {
  // Clear any pending animation
  if (typingTimeout) {
    clearTimeout(typingTimeout)
    typingTimeout = null
  }

  // If new text is shorter, just set it (user cleared or correction)
  if (targetText.length < animatedTranscript.value.length) {
    animatedTranscript.value = targetText
    lastAnimatedLength = targetText.length
    return
  }

  // Start from where we left off
  const startIndex = lastAnimatedLength

  // Type remaining characters one by one
  function typeNext(index: number) {
    if (index <= targetText.length) {
      animatedTranscript.value = targetText.substring(0, index)
      lastAnimatedLength = index

      if (index < targetText.length) {
        // Faster typing speed: 15-25ms per character
        const delay = 15 + Math.random() * 10
        typingTimeout = window.setTimeout(() => typeNext(index + 1), delay)
      }
    }
  }

  typeNext(startIndex)
}

// Watch transcript changes for typing animation
watch(transcript, (newVal) => {
  animateTyping(newVal)
})

onMounted(async () => {
  recognition = initRecognition()
  // Use capture phase to intercept before terminal or other elements
  document.addEventListener('keydown', handleKeyDown, { capture: true })
  document.addEventListener('keyup', handleKeyUp, { capture: true })

  // Load available audio devices
  await loadAudioDevices()

  // Check Whisper status on mount
  const status = await checkWhisperStatus()

  // If server is starting (page was reloaded during startup), continue polling
  if (status?.starting) {
    console.log('[Voice] Server is starting, resuming polling...')
    pollWhisperStatus()
  } else if (useWhisper.value) {
    connectWhisperSocket()
  }
})

onBeforeUnmount(() => {
  stopRecording()
  recognition = null
  disconnectSocket()
  disconnectWhisperSocket()
  if (chunkInterval) clearInterval(chunkInterval)
  if (typingTimeout) clearTimeout(typingTimeout)
  if (mediaStream) {
    mediaStream.getTracks().forEach(track => track.stop())
  }
  // Clean up audio playback
  if (audioElement) {
    audioElement.pause()
    audioElement = null
  }
  if (lastAudioUrl.value) {
    URL.revokeObjectURL(lastAudioUrl.value)
  }
  document.removeEventListener('keydown', handleKeyDown, { capture: true })
  document.removeEventListener('keyup', handleKeyUp, { capture: true })
  document.removeEventListener('mousemove', onDrag)
  document.removeEventListener('mouseup', stopDrag)
  document.removeEventListener('click', closeMicSelector)
  if (holdTimeout) clearTimeout(holdTimeout)
})

// Connect when panel opens, disconnect when closes
watch(isOpen, (open) => {
  if (open) {
    connectSocket()
    document.addEventListener('click', closeMicSelector)
  } else {
    disconnectSocket()
    showMicSelector.value = false
    document.removeEventListener('click', closeMicSelector)
  }
})

// Expose methods
defineExpose({
  open: () => { isOpen.value = true },
  close,
  startRecording,
  stopRecording,
  getTranscript: () => transcript.value
})
</script>

<template>
  <Teleport to="body">
    <Transition name="voice-slide">
      <div
        v-if="isOpen"
        ref="containerRef"
        class="voice-window"
        :class="{ dragging: isDragging }"
        :style="containerStyle"
      >
        <div class="glass">
          <!-- Titlebar -->
          <div class="titlebar" @mousedown="startDrag">
            <div class="left">
              <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
                <path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
                <line x1="12" y1="19" x2="12" y2="23"/>
                <line x1="8" y1="23" x2="16" y2="23"/>
              </svg>
              <span>Voice</span>
              <i class="dot" :class="{ recording: isRecording, ptt: isPushToTalk }"></i>
              <span class="mode-badge" :class="{ gpu: useWhisper }">
                {{ useWhisper ? 'GPU' : 'Web' }}
              </span>
            </div>
            <div class="window-controls">
              <button
                class="whisper-toggle"
                :class="{ active: useWhisper, loading: whisperLoading }"
                :disabled="whisperLoading"
                @click.stop="toggleWhisperMode"
                :title="whisperLoading ? 'Starting Whisper server...' : (useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU')"
              >
                <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                  <rect x="4" y="4" width="16" height="16" rx="2"/>
                  <line x1="9" y1="9" x2="9" y2="15"/>
                  <line x1="15" y1="9" x2="15" y2="15"/>
                </svg>
              </button>
              <button class="x" @click="close" title="Close">
                <svg width="8" height="8" viewBox="0 0 10 10">
                  <line x1="0" y1="0" x2="10" y2="10" stroke="currentColor" stroke-width="1.5"/>
                  <line x1="10" y1="0" x2="0" y2="10" stroke="currentColor" stroke-width="1.5"/>
                </svg>
              </button>
            </div>
          </div>

          <!-- Microphone Info Bar -->
          <div
            class="mic-bar"
            :class="{ disabled: !useWhisper }"
            @click.stop="useWhisper ? (showMicSelector = !showMicSelector) : null"
            :title="useWhisper ? 'Click to change microphone' : 'Microphone selection only available with Whisper GPU'"
          >
            <svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
              <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
            </svg>
            <span class="mic-name">{{ useWhisper ? currentMicName : 'System Default' }}</span>
            <template v-if="useWhisper">
              <svg class="chevron" :class="{ open: showMicSelector }" width="8" height="8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3">
                <polyline points="6 9 12 15 18 9"/>
              </svg>
            </template>
            <span v-else class="mic-note">(Web API)</span>
          </div>

          <!-- Microphone Selector Dropdown -->
          <Transition name="dropdown">
            <div v-if="showMicSelector" class="mic-dropdown">
              <div
                v-for="device in audioDevices"
                :key="device.deviceId"
                class="mic-option"
                :class="{ active: device.deviceId === selectedDeviceId }"
                @click.stop="selectMicrophone(device.deviceId)"
              >
                <svg v-if="device.deviceId === selectedDeviceId" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3">
                  <polyline points="20 6 9 17 4 12"/>
                </svg>
                <span>{{ device.label || `Microphone ${audioDevices.indexOf(device) + 1}` }}</span>
              </div>
              <div v-if="audioDevices.length === 0" class="mic-option disabled">
                No microphones found
              </div>
            </div>
          </Transition>

          <!-- Content -->
          <div class="content">
            <div class="transcript" :class="{ empty: !animatedTranscript && !interimTranscript }">
              <span class="final">{{ animatedTranscript }}</span><span class="cursor" v-if="animatedTranscript && animatedTranscript.length < transcript.length">|</span>
              <span class="interim">{{ interimTranscript }}</span>
              <span v-if="!animatedTranscript && !interimTranscript" class="placeholder">
                Presiona el micrófono o mantén Ctrl+Space...
              </span>
            </div>

            <div v-if="error" class="error">{{ error }}</div>
          </div>

          <!-- Controls -->
          <div class="controls">
            <button
              class="rec-btn"
              :class="{ active: isRecording }"
              @click="toggleRecording"
              :title="isRecording ? 'Stop' : 'Record'"
            >
              <svg v-if="!isRecording" width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
                <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
                <path d="M19 10v2a7 7 0 0 1-14 0v-2" fill="none" stroke="currentColor" stroke-width="2"/>
                <line x1="12" y1="19" x2="12" y2="23" stroke="currentColor" stroke-width="2"/>
                <line x1="8" y1="23" x2="16" y2="23" stroke="currentColor" stroke-width="2"/>
              </svg>
              <svg v-else width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
                <rect x="6" y="6" width="12" height="12" rx="2"/>
              </svg>
            </button>

            <button class="action-btn" @click="clearTranscript" title="Clear" :disabled="!transcript">
              <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                <polyline points="3 6 5 6 21 6"/>
                <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"/>
              </svg>
            </button>

            <button
              class="action-btn play"
              :class="{ playing: isPlayingAudio }"
              @click="playLastAudio"
              title="Play last audio"
              :disabled="!lastAudioUrl"
            >
              <svg v-if="!isPlayingAudio" width="14" height="14" viewBox="0 0 24 24" fill="currentColor">
                <polygon points="5 3 19 12 5 21 5 3"/>
              </svg>
              <svg v-else width="14" height="14" viewBox="0 0 24 24" fill="currentColor">
                <rect x="6" y="4" width="4" height="16"/>
                <rect x="14" y="4" width="4" height="16"/>
              </svg>
            </button>

            <button
              class="action-btn send"
              @click="sendTranscript"
              title="Send to Claude"
              :disabled="!transcript.trim()"
            >
              <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
                <line x1="22" y1="2" x2="11" y2="13"/>
                <polygon points="22 2 15 22 11 13 2 9 22 2"/>
              </svg>
              <span>Send</span>
            </button>
          </div>
        </div>
      </div>
    </Transition>
  </Teleport>
</template>

<style scoped>
.voice-window {
  position: fixed;
  width: 320px;
  z-index: 9998;
}

.voice-window.dragging {
  user-select: none;
}

.glass {
  position: relative;
  display: flex;
  flex-direction: column;
  background: rgba(200, 215, 235, 0.35);
  backdrop-filter: blur(24px) saturate(1.6);
  -webkit-backdrop-filter: blur(24px) saturate(1.6);
  border-radius: 8px;
  border: 1px solid rgba(255, 255, 255, 0.6);
  box-shadow:
    0 0 0 1px rgba(80, 120, 180, 0.25),
    0 6px 24px rgba(0, 0, 0, 0.25),
    inset 0 1px 0 rgba(255, 255, 255, 0.6);
  overflow: visible;
}

.titlebar {
  height: 26px;
  padding: 0 6px;
  background: rgba(255, 255, 255, 0.25);
  border-bottom: 1px solid rgba(255, 255, 255, 0.3);
  display: flex;
  align-items: center;
  justify-content: space-between;
  cursor: grab;
  user-select: none;
}

.titlebar:active {
  cursor: grabbing;
}

.left {
  display: flex;
  align-items: center;
  gap: 6px;
  color: #222;
  font: 500 11px/1 system-ui, sans-serif;
}

.dot {
  width: 6px;
  height: 6px;
  border-radius: 50%;
  background: #999;
}

.dot.recording {
  background: #e33;
  box-shadow: 0 0 6px #e33;
  animation: pulse 0.8s infinite;
}

.dot.ptt {
  background: #f90;
  box-shadow: 0 0 6px #f90;
}

.mode-badge {
  font-size: 8px;
  padding: 1px 4px;
  border-radius: 3px;
  background: rgba(0, 0, 0, 0.2);
  color: #555;
  font-weight: 600;
  text-transform: uppercase;
}

.mode-badge.gpu {
  background: linear-gradient(135deg, #10b981, #059669);
  color: #fff;
  box-shadow: 0 0 4px rgba(16, 185, 129, 0.5);
}

.whisper-toggle {
  width: 20px;
  height: 18px;
  display: flex;
  align-items: center;
  justify-content: center;
  background: rgba(255, 255, 255, 0.3);
  border: 1px solid rgba(0, 0, 0, 0.1);
  border-radius: 3px;
  color: #666;
  cursor: pointer;
  transition: all 0.15s;
}

.whisper-toggle:hover:not(:disabled) {
  background: rgba(255, 255, 255, 0.5);
}

.whisper-toggle:disabled {
  cursor: not-allowed;
  opacity: 0.6;
}

.whisper-toggle.active {
  background: linear-gradient(180deg, #10b981 0%, #059669 100%);
  border-color: #047857;
  color: #fff;
}

.whisper-toggle.loading {
  animation: pulse 0.6s infinite;
  background: linear-gradient(180deg, #f59e0b 0%, #d97706 100%);
  border-color: #b45309;
  color: #fff;
}

@keyframes pulse {
  0%, 100% { opacity: 1; }
  50% { opacity: 0.5; }
}

.window-controls {
  display: flex;
  gap: 2px;
}

.window-controls button {
  width: 20px;
  height: 18px;
  display: flex;
  align-items: center;
  justify-content: center;
  background: rgba(255, 255, 255, 0.3);
  border: 1px solid rgba(0, 0, 0, 0.1);
  border-radius: 3px;
  color: #333;
  cursor: pointer;
}

.window-controls button:hover {
  background: rgba(255, 255, 255, 0.5);
}

.window-controls button.x:hover {
  background: linear-gradient(180deg, #e66 0%, #c33 100%);
  border-color: #a22;
  color: #fff;
}

/* Microphone Bar */
.mic-bar {
  display: flex;
  align-items: center;
  gap: 6px;
  padding: 6px 10px;
  background: rgba(0, 0, 0, 0.05);
  border-bottom: 1px solid rgba(255, 255, 255, 0.2);
  cursor: pointer;
  font-size: 10px;
  color: #444;
  transition: background 0.15s;
}

.mic-bar:hover:not(.disabled) {
  background: rgba(0, 0, 0, 0.1);
}

.mic-bar.disabled {
  cursor: default;
  opacity: 0.7;
}

.mic-note {
  font-size: 9px;
  color: #888;
  font-style: italic;
}

.mic-name {
  flex: 1;
  overflow: hidden;
  text-overflow: ellipsis;
  white-space: nowrap;
  font-weight: 500;
}

.mic-bar .chevron {
  transition: transform 0.2s;
}

.mic-bar .chevron.open {
  transform: rotate(180deg);
}

/* Microphone Dropdown */
.mic-dropdown {
  position: absolute;
  top: 52px;
  left: 0;
  right: 0;
  background: rgba(255, 255, 255, 0.95);
  backdrop-filter: blur(12px);
  border: 1px solid rgba(0, 0, 0, 0.1);
  border-radius: 0 0 6px 6px;
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
  z-index: 10;
  max-height: 200px;
  overflow-y: auto;
}

.mic-option {
  display: flex;
  align-items: center;
  gap: 8px;
  padding: 8px 12px;
  font-size: 11px;
  color: #333;
  cursor: pointer;
  transition: background 0.1s;
}

.mic-option:hover:not(.disabled) {
  background: rgba(74, 153, 153, 0.1);
}

.mic-option.active {
  background: rgba(74, 153, 153, 0.15);
  color: #2a7;
  font-weight: 500;
}

.mic-option.disabled {
  color: #999;
  cursor: default;
  font-style: italic;
}

.mic-option svg {
  flex-shrink: 0;
  color: #2a7;
}

.mic-option span {
  overflow: hidden;
  text-overflow: ellipsis;
  white-space: nowrap;
}

/* Staggered animation for mic options */
.mic-dropdown .mic-option {
  animation: slideIn 0.2s ease forwards;
  opacity: 0;
}

.mic-dropdown .mic-option:nth-child(1) { animation-delay: 0.02s; }
.mic-dropdown .mic-option:nth-child(2) { animation-delay: 0.04s; }
.mic-dropdown .mic-option:nth-child(3) { animation-delay: 0.06s; }
.mic-dropdown .mic-option:nth-child(4) { animation-delay: 0.08s; }
.mic-dropdown .mic-option:nth-child(5) { animation-delay: 0.10s; }

@keyframes slideIn {
  from {
    opacity: 0;
    transform: translateX(-8px);
  }
  to {
    opacity: 1;
    transform: translateX(0);
  }
}

/* Dropdown transition */
.dropdown-enter-active {
  transition: all 0.2s cubic-bezier(0.34, 1.56, 0.64, 1);
}

.dropdown-leave-active {
  transition: all 0.15s cubic-bezier(0.4, 0, 1, 1);
}

.dropdown-enter-from {
  opacity: 0;
  transform: translateY(-10px) scaleY(0.8);
  transform-origin: top center;
}

.dropdown-leave-to {
  opacity: 0;
  transform: translateY(-6px) scaleY(0.9);
  transform-origin: top center;
}

.content {
  padding: 12px;
  min-height: 80px;
  max-height: 150px;
  overflow-y: auto;
}

.transcript {
  font-size: 13px;
  line-height: 1.5;
  color: #222;
}

.transcript.empty {
  color: #666;
  font-style: italic;
}

.transcript .interim {
  color: #666;
  font-style: italic;
}

.transcript .cursor {
  color: #4a9;
  font-weight: bold;
  animation: blink 0.6s infinite;
}

@keyframes blink {
  0%, 50% { opacity: 1; }
  51%, 100% { opacity: 0; }
}

.transcript .placeholder {
  color: #888;
}

.error {
  margin-top: 8px;
  padding: 6px 10px;
  background: rgba(220, 50, 50, 0.15);
  border-radius: 4px;
  color: #c33;
  font-size: 11px;
}

.controls {
  display: flex;
  align-items: center;
  gap: 8px;
  padding: 10px 12px;
  background: rgba(255, 255, 255, 0.15);
  border-top: 1px solid rgba(255, 255, 255, 0.3);
}

.rec-btn {
  width: 36px;
  height: 36px;
  border-radius: 50%;
  display: flex;
  align-items: center;
  justify-content: center;
  background: linear-gradient(180deg, #f5f5f5 0%, #ddd 100%);
  border: 1px solid rgba(0, 0, 0, 0.2);
  color: #333;
  cursor: pointer;
  transition: all 0.15s;
}

.rec-btn:hover {
  background: linear-gradient(180deg, #fff 0%, #eee 100%);
}

.rec-btn.active {
  background: linear-gradient(180deg, #e55 0%, #c33 100%);
  border-color: #a22;
  color: #fff;
  animation: pulse 1s infinite;
}

.action-btn {
  height: 28px;
  padding: 0 10px;
  display: flex;
  align-items: center;
  gap: 4px;
  background: rgba(255, 255, 255, 0.4);
  border: 1px solid rgba(0, 0, 0, 0.15);
  border-radius: 4px;
  color: #333;
  font-size: 11px;
  cursor: pointer;
  transition: all 0.15s;
}

.action-btn:hover:not(:disabled) {
  background: rgba(255, 255, 255, 0.6);
}

.action-btn:disabled {
  opacity: 0.4;
  cursor: not-allowed;
}

.action-btn.play {
  background: linear-gradient(180deg, #6b7280 0%, #4b5563 100%);
  border-color: #374151;
  color: #fff;
}

.action-btn.play:hover:not(:disabled) {
  background: linear-gradient(180deg, #9ca3af 0%, #6b7280 100%);
}

.action-btn.play.playing {
  background: linear-gradient(180deg, #f59e0b 0%, #d97706 100%);
  border-color: #b45309;
  animation: pulse 1s infinite;
}

.action-btn.send {
  margin-left: auto;
  background: linear-gradient(180deg, #4a9 0%, #3a8 100%);
  border-color: #2a7;
  color: #fff;
}

.action-btn.send:hover:not(:disabled) {
  background: linear-gradient(180deg, #5ba 0%, #4a9 100%);
}

/* Transitions */
.voice-slide-enter-active,
.voice-slide-leave-active {
  transition: all 0.2s ease;
}

.voice-slide-enter-from,
.voice-slide-leave-to {
  opacity: 0;
  transform: translateY(16px) scale(0.95);
}
</style>