Compare commits
5 Commits
e867b7873e
...
853aea6eb5
| Author | SHA1 | Date | |
|---|---|---|---|
| 853aea6eb5 | |||
| 5be0fb91ab | |||
| 9f1e10b8d5 | |||
| ac17a9f292 | |||
| 638e6ac8e0 |
@@ -50,7 +50,9 @@
|
||||
"mcp__agent-ui__localhost_4100-notificar",
|
||||
"mcp__agent-ui__localhost_4100-enviar_al_panel",
|
||||
"mcp__agent-ui__localhost_4100-render_html",
|
||||
"mcp__agent-ui__localhost_4100-load_vue_component"
|
||||
"mcp__agent-ui__localhost_4100-load_vue_component",
|
||||
"mcp__agent-ui__localhost_4100-page_refresh",
|
||||
"WebFetch(domain:docs.anthropic.com)"
|
||||
]
|
||||
},
|
||||
"enableAllProjectMcpServers": true,
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,3 +3,5 @@ frontend/node_modules/
|
||||
.env
|
||||
*.log
|
||||
dist/
|
||||
frontend/dev-dist/
|
||||
nul
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
if('serviceWorker' in navigator) navigator.serviceWorker.register('/dev-sw.js?dev-sw', { scope: '/', type: 'classic' })
|
||||
@@ -1,94 +0,0 @@
|
||||
/**
|
||||
* Copyright 2018 Google Inc. All Rights Reserved.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// If the loader is already loaded, just stop.
|
||||
if (!self.define) {
|
||||
let registry = {};
|
||||
|
||||
// Used for `eval` and `importScripts` where we can't get script URL by other means.
|
||||
// In both cases, it's safe to use a global var because those functions are synchronous.
|
||||
let nextDefineUri;
|
||||
|
||||
const singleRequire = (uri, parentUri) => {
|
||||
uri = new URL(uri + ".js", parentUri).href;
|
||||
return registry[uri] || (
|
||||
|
||||
new Promise(resolve => {
|
||||
if ("document" in self) {
|
||||
const script = document.createElement("script");
|
||||
script.src = uri;
|
||||
script.onload = resolve;
|
||||
document.head.appendChild(script);
|
||||
} else {
|
||||
nextDefineUri = uri;
|
||||
importScripts(uri);
|
||||
resolve();
|
||||
}
|
||||
})
|
||||
|
||||
.then(() => {
|
||||
let promise = registry[uri];
|
||||
if (!promise) {
|
||||
throw new Error(`Module ${uri} didn’t register its module`);
|
||||
}
|
||||
return promise;
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
self.define = (depsNames, factory) => {
|
||||
const uri = nextDefineUri || ("document" in self ? document.currentScript.src : "") || location.href;
|
||||
if (registry[uri]) {
|
||||
// Module is already loading or loaded.
|
||||
return;
|
||||
}
|
||||
let exports = {};
|
||||
const require = depUri => singleRequire(depUri, uri);
|
||||
const specialDeps = {
|
||||
module: { uri },
|
||||
exports,
|
||||
require
|
||||
};
|
||||
registry[uri] = Promise.all(depsNames.map(
|
||||
depName => specialDeps[depName] || require(depName)
|
||||
)).then(deps => {
|
||||
factory(...deps);
|
||||
return exports;
|
||||
});
|
||||
};
|
||||
}
|
||||
define(['./workbox-5a5d9309'], (function (workbox) { 'use strict';
|
||||
|
||||
self.skipWaiting();
|
||||
workbox.clientsClaim();
|
||||
|
||||
/**
|
||||
* The precacheAndRoute() method efficiently caches and responds to
|
||||
* requests for URLs in the manifest.
|
||||
* See https://goo.gl/S9QRab
|
||||
*/
|
||||
workbox.precacheAndRoute([{
|
||||
"url": "suppress-warnings.js",
|
||||
"revision": "d41d8cd98f00b204e9800998ecf8427e"
|
||||
}, {
|
||||
"url": "index.html",
|
||||
"revision": "0.24e3u5ntq78"
|
||||
}], {});
|
||||
workbox.cleanupOutdatedCaches();
|
||||
workbox.registerRoute(new workbox.NavigationRoute(workbox.createHandlerBoundToURL("index.html"), {
|
||||
allowlist: [/^\/$/],
|
||||
denylist: [/^\/api\//]
|
||||
}));
|
||||
|
||||
}));
|
||||
//# sourceMappingURL=sw.js.map
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -250,13 +250,38 @@ function initTerminal() {
|
||||
}
|
||||
})
|
||||
|
||||
// Capture Ctrl+E even when terminal has focus
|
||||
// Capture Ctrl+E and Ctrl+V when terminal has focus
|
||||
terminal.attachCustomKeyEventHandler((e) => {
|
||||
// Ctrl+E: Toggle terminal
|
||||
if (e.ctrlKey && e.key === 'e') {
|
||||
e.preventDefault()
|
||||
toggleTerminal()
|
||||
return false // Prevent terminal from processing
|
||||
return false
|
||||
}
|
||||
|
||||
// Ctrl+V: Paste from clipboard
|
||||
if (e.ctrlKey && e.key === 'v' && e.type === 'keydown') {
|
||||
e.preventDefault()
|
||||
navigator.clipboard.readText().then((text) => {
|
||||
if (text && socket && socket.readyState === WebSocket.OPEN) {
|
||||
socket.send(JSON.stringify({ type: 'input', data: text }))
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.error('[Terminal] Clipboard read failed:', err)
|
||||
})
|
||||
return false
|
||||
}
|
||||
|
||||
// Ctrl+C: Copy selection (if any)
|
||||
if (e.ctrlKey && e.key === 'c' && e.type === 'keydown') {
|
||||
const selection = terminal?.getSelection()
|
||||
if (selection) {
|
||||
navigator.clipboard.writeText(selection).catch(console.error)
|
||||
return false
|
||||
}
|
||||
// If no selection, let Ctrl+C pass through as SIGINT
|
||||
}
|
||||
|
||||
return true // Let terminal handle other keys
|
||||
})
|
||||
}
|
||||
|
||||
@@ -23,6 +23,11 @@ const transcript = ref('')
|
||||
const interimTranscript = ref('')
|
||||
const error = ref('')
|
||||
|
||||
// Typing animation state
|
||||
const animatedTranscript = ref('')
|
||||
let typingTimeout: number | null = null
|
||||
let lastAnimatedLength = 0
|
||||
|
||||
// Position and drag state
|
||||
const position = ref({ x: 0, y: 0 })
|
||||
const hasCustomPosition = ref(false)
|
||||
@@ -30,18 +35,32 @@ const isDragging = ref(false)
|
||||
const dragOffset = ref({ x: 0, y: 0 })
|
||||
const containerRef = ref<HTMLElement | null>(null)
|
||||
|
||||
// Speech recognition
|
||||
// Speech recognition (Web Speech API)
|
||||
let recognition: SpeechRecognition | null = null
|
||||
|
||||
// WebSocket connection (own session)
|
||||
// WebSocket connection to terminal
|
||||
const WS_URL = `ws://${window.location.hostname}:4103`
|
||||
let socket: WebSocket | null = null
|
||||
const connected = ref(false)
|
||||
|
||||
// Push-to-talk state (Ctrl+S)
|
||||
// Push-to-talk state (Ctrl+Space)
|
||||
let keyDownTime = 0
|
||||
let holdTimeout: number | null = null
|
||||
const isPushToTalk = ref(false)
|
||||
let pendingWhisperSend = false // Flag to send transcript when Whisper responds
|
||||
|
||||
// ============ WHISPER MODE ============
|
||||
const useWhisper = ref(false)
|
||||
const whisperReady = ref(false)
|
||||
const whisperLoading = ref(false)
|
||||
const WHISPER_WS_URL = `ws://${window.location.hostname}:4104`
|
||||
let whisperSocket: WebSocket | null = null
|
||||
let mediaRecorder: MediaRecorder | null = null
|
||||
let audioChunks: Blob[] = []
|
||||
let lastTranscriptLength = 0 // Track length of last transcription to show only new text
|
||||
let chunkInterval: number | null = null
|
||||
const CHUNK_INTERVAL_MS = 3000 // Send audio every 3 seconds
|
||||
let mediaStream: MediaStream | null = null
|
||||
|
||||
const displayText = computed(() => {
|
||||
if (interimTranscript.value) {
|
||||
@@ -73,7 +92,7 @@ function initRecognition() {
|
||||
const rec = new SpeechRecognition()
|
||||
rec.continuous = true
|
||||
rec.interimResults = true
|
||||
rec.lang = 'es-ES'
|
||||
rec.lang = 'es-419' // Latin American Spanish (better for accents)
|
||||
|
||||
rec.onresult = (event: SpeechRecognitionEvent) => {
|
||||
let interim = ''
|
||||
@@ -105,7 +124,7 @@ function initRecognition() {
|
||||
}
|
||||
|
||||
rec.onend = () => {
|
||||
if (isRecording.value) {
|
||||
if (isRecording.value && !useWhisper.value) {
|
||||
// Restart if still recording (browser stops after silence)
|
||||
rec.start()
|
||||
}
|
||||
@@ -114,6 +133,307 @@ function initRecognition() {
|
||||
return rec
|
||||
}
|
||||
|
||||
// ============ WHISPER FUNCTIONS ============
|
||||
|
||||
async function checkWhisperStatus(updateLoading = true) {
|
||||
try {
|
||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
|
||||
const data = await res.json()
|
||||
useWhisper.value = data.enabled
|
||||
whisperReady.value = data.running
|
||||
if (updateLoading) {
|
||||
whisperLoading.value = data.starting || false
|
||||
}
|
||||
return data
|
||||
} catch {
|
||||
useWhisper.value = false
|
||||
whisperReady.value = false
|
||||
if (updateLoading) {
|
||||
whisperLoading.value = false
|
||||
}
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async function toggleWhisperMode() {
|
||||
// Prevent multiple clicks
|
||||
if (whisperLoading.value) {
|
||||
console.log('[Voice] Toggle already in progress, ignoring')
|
||||
return
|
||||
}
|
||||
|
||||
whisperLoading.value = true
|
||||
error.value = ''
|
||||
|
||||
// Show immediate feedback
|
||||
if (!useWhisper.value) {
|
||||
canvasStore.showNotification('Starting Whisper GPU server...', 'info', 10000)
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
|
||||
method: 'POST'
|
||||
})
|
||||
const data = await res.json()
|
||||
|
||||
// Server is starting - poll until ready
|
||||
if (data.starting) {
|
||||
console.log('[Voice] Server starting, polling for status...')
|
||||
await pollWhisperStatus()
|
||||
return
|
||||
}
|
||||
|
||||
useWhisper.value = data.enabled
|
||||
whisperReady.value = data.running
|
||||
|
||||
if (data.enabled) {
|
||||
canvasStore.showNotification('Whisper GPU ready!', 'success')
|
||||
connectWhisperSocket()
|
||||
} else {
|
||||
canvasStore.showNotification('Using Web Speech API', 'info')
|
||||
disconnectWhisperSocket()
|
||||
}
|
||||
} catch (e: any) {
|
||||
error.value = 'Failed to toggle Whisper'
|
||||
canvasStore.showNotification('Error starting Whisper server', 'error')
|
||||
console.error('[Voice] Whisper toggle error:', e)
|
||||
} finally {
|
||||
whisperLoading.value = false
|
||||
}
|
||||
}
|
||||
|
||||
// Poll server status until ready or failed
|
||||
async function pollWhisperStatus() {
|
||||
const maxAttempts = 60 // 2 minutes max
|
||||
let attempts = 0
|
||||
|
||||
while (attempts < maxAttempts) {
|
||||
await new Promise(resolve => setTimeout(resolve, 2000))
|
||||
attempts++
|
||||
|
||||
try {
|
||||
const status = await checkWhisperStatus(false) // Don't update loading state
|
||||
|
||||
if (!status) {
|
||||
console.log('[Voice] Failed to get status')
|
||||
continue
|
||||
}
|
||||
|
||||
// Still starting
|
||||
if (status.starting) {
|
||||
console.log(`[Voice] Still starting... (${attempts * 2}s)`)
|
||||
continue
|
||||
}
|
||||
|
||||
// Started successfully
|
||||
if (status.running && status.enabled) {
|
||||
console.log('[Voice] Server ready!')
|
||||
canvasStore.showNotification('Whisper GPU ready!', 'success')
|
||||
connectWhisperSocket()
|
||||
whisperLoading.value = false
|
||||
return
|
||||
}
|
||||
|
||||
// Failed to start
|
||||
console.log('[Voice] Server failed to start')
|
||||
canvasStore.showNotification('Whisper server failed to start', 'error')
|
||||
whisperLoading.value = false
|
||||
return
|
||||
|
||||
} catch (e) {
|
||||
console.error('[Voice] Polling error:', e)
|
||||
}
|
||||
}
|
||||
|
||||
// Timeout
|
||||
canvasStore.showNotification('Whisper server timeout', 'error')
|
||||
whisperLoading.value = false
|
||||
}
|
||||
|
||||
function connectWhisperSocket() {
|
||||
if (whisperSocket?.readyState === WebSocket.OPEN) return
|
||||
|
||||
console.log('[Voice] Connecting to Whisper server...')
|
||||
whisperSocket = new WebSocket(WHISPER_WS_URL)
|
||||
|
||||
whisperSocket.onopen = () => {
|
||||
console.log('[Voice] Whisper WebSocket connected')
|
||||
whisperReady.value = true
|
||||
}
|
||||
|
||||
whisperSocket.onmessage = (event) => {
|
||||
try {
|
||||
const msg = JSON.parse(event.data)
|
||||
|
||||
if (msg.type === 'ready') {
|
||||
console.log('[Voice] Whisper ready:', msg.model, msg.device)
|
||||
whisperReady.value = true
|
||||
} else if (msg.type === 'transcription') {
|
||||
if (msg.success && msg.text) {
|
||||
const fullText = msg.text.trim()
|
||||
|
||||
if (msg.partial) {
|
||||
// For partial results, show full accumulated transcription
|
||||
transcript.value = fullText + ' '
|
||||
interimTranscript.value = ''
|
||||
console.log(`[Voice] 🔄 WHISPER partial:`, fullText)
|
||||
} else {
|
||||
// Final result
|
||||
transcript.value = fullText + ' '
|
||||
interimTranscript.value = ''
|
||||
console.log(`[Voice] 🎯 WHISPER-GPU (${msg.model}/${msg.device}):`, fullText)
|
||||
|
||||
// Auto-send if push-to-talk was waiting for this
|
||||
if (pendingWhisperSend) {
|
||||
pendingWhisperSend = false
|
||||
console.log('[Voice] Whisper response received, sending transcript')
|
||||
if (transcript.value.trim()) {
|
||||
sendTranscriptAndClose()
|
||||
} else {
|
||||
isPushToTalk.value = false
|
||||
close()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update last transcript length for next partial
|
||||
lastTranscriptLength = fullText.length
|
||||
} else if (msg.error) {
|
||||
error.value = msg.error
|
||||
console.error('[Voice] Whisper error:', msg.error)
|
||||
// Clear pending send on error
|
||||
if (pendingWhisperSend) {
|
||||
pendingWhisperSend = false
|
||||
isPushToTalk.value = false
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[Voice] Whisper message error:', e)
|
||||
}
|
||||
}
|
||||
|
||||
whisperSocket.onclose = () => {
|
||||
console.log('[Voice] Whisper WebSocket closed')
|
||||
whisperReady.value = false
|
||||
}
|
||||
|
||||
whisperSocket.onerror = (e) => {
|
||||
console.error('[Voice] Whisper WebSocket error:', e)
|
||||
whisperReady.value = false
|
||||
}
|
||||
}
|
||||
|
||||
function disconnectWhisperSocket() {
|
||||
if (whisperSocket) {
|
||||
whisperSocket.close()
|
||||
whisperSocket = null
|
||||
}
|
||||
whisperReady.value = false
|
||||
}
|
||||
|
||||
async function startWhisperRecording() {
|
||||
try {
|
||||
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
|
||||
mediaRecorder = new MediaRecorder(mediaStream, {
|
||||
mimeType: 'audio/webm;codecs=opus'
|
||||
})
|
||||
|
||||
audioChunks = []
|
||||
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
audioChunks.push(event.data)
|
||||
}
|
||||
}
|
||||
|
||||
// Reset state for new recording
|
||||
audioChunks = []
|
||||
lastTranscriptLength = 0
|
||||
|
||||
// Start recording
|
||||
mediaRecorder.start(100) // Collect data every 100ms
|
||||
isRecording.value = true
|
||||
|
||||
// Send chunks periodically for progressive transcription
|
||||
chunkInterval = window.setInterval(() => {
|
||||
if (audioChunks.length > 0 && whisperSocket?.readyState === WebSocket.OPEN) {
|
||||
sendAudioChunk(false) // false = partial, don't clear
|
||||
}
|
||||
}, CHUNK_INTERVAL_MS)
|
||||
|
||||
} catch (e: any) {
|
||||
error.value = `Microphone error: ${e.message}`
|
||||
console.error('[Voice] Microphone error:', e)
|
||||
}
|
||||
}
|
||||
|
||||
function sendAudioChunk(isFinal: boolean) {
|
||||
if (audioChunks.length === 0) return
|
||||
|
||||
// Always send ALL accumulated audio (webm needs header from first chunk)
|
||||
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' })
|
||||
const chunkCount = audioChunks.length
|
||||
|
||||
// Skip if audio is too small (< 5KB) - WebM header alone is ~1-2KB
|
||||
if (audioBlob.size < 5000) {
|
||||
console.log(`[Voice] Skipping small chunk (${audioBlob.size} bytes)`)
|
||||
if (isFinal) {
|
||||
audioChunks = []
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Clear chunks only if final
|
||||
if (isFinal) {
|
||||
audioChunks = []
|
||||
lastTranscriptLength = 0
|
||||
}
|
||||
|
||||
const reader = new FileReader()
|
||||
reader.onloadend = () => {
|
||||
const base64 = (reader.result as string).split(',')[1]
|
||||
|
||||
if (whisperSocket?.readyState === WebSocket.OPEN) {
|
||||
whisperSocket.send(JSON.stringify({
|
||||
type: 'transcribe',
|
||||
audio: base64,
|
||||
language: 'es',
|
||||
partial: !isFinal
|
||||
}))
|
||||
console.log(`[Voice] Sent ${isFinal ? 'FINAL' : 'partial'} audio (${chunkCount} chunks, ${audioBlob.size} bytes)`)
|
||||
}
|
||||
}
|
||||
reader.readAsDataURL(audioBlob)
|
||||
}
|
||||
|
||||
function stopWhisperRecording() {
|
||||
// Clear the chunk interval
|
||||
if (chunkInterval) {
|
||||
clearInterval(chunkInterval)
|
||||
chunkInterval = null
|
||||
}
|
||||
|
||||
// Send final chunk
|
||||
if (audioChunks.length > 0) {
|
||||
sendAudioChunk(true) // true = final
|
||||
}
|
||||
|
||||
// Stop recorder
|
||||
if (mediaRecorder && mediaRecorder.state !== 'inactive') {
|
||||
mediaRecorder.stop()
|
||||
}
|
||||
|
||||
// Stop media stream
|
||||
if (mediaStream) {
|
||||
mediaStream.getTracks().forEach(track => track.stop())
|
||||
mediaStream = null
|
||||
}
|
||||
|
||||
isRecording.value = false
|
||||
}
|
||||
|
||||
function toggleRecording() {
|
||||
if (isRecording.value) {
|
||||
stopRecording()
|
||||
@@ -124,6 +444,12 @@ function toggleRecording() {
|
||||
|
||||
function startRecording() {
|
||||
error.value = ''
|
||||
|
||||
if (useWhisper.value && whisperReady.value) {
|
||||
// Use Whisper GPU mode
|
||||
startWhisperRecording()
|
||||
} else {
|
||||
// Use Web Speech API
|
||||
if (!recognition) {
|
||||
recognition = initRecognition()
|
||||
}
|
||||
@@ -135,19 +461,30 @@ function startRecording() {
|
||||
console.error('[Voice] Failed to start:', e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function stopRecording() {
|
||||
if (useWhisper.value) {
|
||||
stopWhisperRecording()
|
||||
} else {
|
||||
if (recognition) {
|
||||
recognition.stop()
|
||||
}
|
||||
isRecording.value = false
|
||||
}
|
||||
interimTranscript.value = ''
|
||||
}
|
||||
|
||||
function clearTranscript() {
|
||||
transcript.value = ''
|
||||
interimTranscript.value = ''
|
||||
animatedTranscript.value = ''
|
||||
lastAnimatedLength = 0
|
||||
if (typingTimeout) {
|
||||
clearTimeout(typingTimeout)
|
||||
typingTimeout = null
|
||||
}
|
||||
}
|
||||
|
||||
function connectSocket() {
|
||||
@@ -209,6 +546,7 @@ function sendTranscript() {
|
||||
|
||||
function close() {
|
||||
stopRecording()
|
||||
clearTranscript()
|
||||
isOpen.value = false
|
||||
}
|
||||
|
||||
@@ -290,21 +628,31 @@ function handleKeyUp(e: KeyboardEvent) {
|
||||
holdTimeout = null
|
||||
}
|
||||
|
||||
// If was push-to-talk recording, stop and send after 1200ms
|
||||
// If was push-to-talk recording, continue recording for 1.5s buffer then stop
|
||||
if (isPushToTalk.value && isRecording.value) {
|
||||
console.log('[Voice] Stopping recording, will send in 1200ms')
|
||||
stopRecording()
|
||||
console.log('[Voice] Key released, continuing recording for 1.5s buffer...')
|
||||
|
||||
// Keep recording for 1.5s more (UX buffer for trailing words)
|
||||
setTimeout(() => {
|
||||
console.log('[Voice] Buffer complete, stopping recording')
|
||||
stopRecording()
|
||||
|
||||
if (useWhisper.value) {
|
||||
// For Whisper: wait for server response (handled in onmessage)
|
||||
console.log('[Voice] Waiting for Whisper transcription...')
|
||||
pendingWhisperSend = true
|
||||
} else {
|
||||
// For Web Speech API: send after short delay for final results
|
||||
setTimeout(() => {
|
||||
console.log('[Voice] Sending transcript:', transcript.value.trim())
|
||||
console.log('[Voice] Socket state:', socket?.readyState)
|
||||
if (transcript.value.trim()) {
|
||||
sendTranscriptAndClose()
|
||||
} else {
|
||||
// No transcript, just close
|
||||
isPushToTalk.value = false
|
||||
close()
|
||||
}
|
||||
}, 1200)
|
||||
}, 300)
|
||||
}
|
||||
}, 1500)
|
||||
}
|
||||
|
||||
keyDownTime = 0
|
||||
@@ -349,17 +697,74 @@ function sendTranscriptAndClose() {
|
||||
typeChar()
|
||||
}
|
||||
|
||||
onMounted(() => {
|
||||
// Typing animation effect
|
||||
function animateTyping(targetText: string) {
|
||||
// Clear any pending animation
|
||||
if (typingTimeout) {
|
||||
clearTimeout(typingTimeout)
|
||||
typingTimeout = null
|
||||
}
|
||||
|
||||
// If new text is shorter, just set it (user cleared or correction)
|
||||
if (targetText.length < animatedTranscript.value.length) {
|
||||
animatedTranscript.value = targetText
|
||||
lastAnimatedLength = targetText.length
|
||||
return
|
||||
}
|
||||
|
||||
// Start from where we left off
|
||||
const startIndex = lastAnimatedLength
|
||||
|
||||
// Type remaining characters one by one
|
||||
function typeNext(index: number) {
|
||||
if (index <= targetText.length) {
|
||||
animatedTranscript.value = targetText.substring(0, index)
|
||||
lastAnimatedLength = index
|
||||
|
||||
if (index < targetText.length) {
|
||||
// Faster typing speed: 15-25ms per character
|
||||
const delay = 15 + Math.random() * 10
|
||||
typingTimeout = window.setTimeout(() => typeNext(index + 1), delay)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
typeNext(startIndex)
|
||||
}
|
||||
|
||||
// Watch transcript changes for typing animation
|
||||
watch(transcript, (newVal) => {
|
||||
animateTyping(newVal)
|
||||
})
|
||||
|
||||
onMounted(async () => {
|
||||
recognition = initRecognition()
|
||||
// Use capture phase to intercept before terminal or other elements
|
||||
document.addEventListener('keydown', handleKeyDown, { capture: true })
|
||||
document.addEventListener('keyup', handleKeyUp, { capture: true })
|
||||
|
||||
// Check Whisper status on mount
|
||||
const status = await checkWhisperStatus()
|
||||
|
||||
// If server is starting (page was reloaded during startup), continue polling
|
||||
if (status?.starting) {
|
||||
console.log('[Voice] Server is starting, resuming polling...')
|
||||
pollWhisperStatus()
|
||||
} else if (useWhisper.value) {
|
||||
connectWhisperSocket()
|
||||
}
|
||||
})
|
||||
|
||||
onBeforeUnmount(() => {
|
||||
stopRecording()
|
||||
recognition = null
|
||||
disconnectSocket()
|
||||
disconnectWhisperSocket()
|
||||
if (chunkInterval) clearInterval(chunkInterval)
|
||||
if (typingTimeout) clearTimeout(typingTimeout)
|
||||
if (mediaStream) {
|
||||
mediaStream.getTracks().forEach(track => track.stop())
|
||||
}
|
||||
document.removeEventListener('keydown', handleKeyDown, { capture: true })
|
||||
document.removeEventListener('keyup', handleKeyUp, { capture: true })
|
||||
document.removeEventListener('mousemove', onDrag)
|
||||
@@ -408,8 +813,24 @@ defineExpose({
|
||||
</svg>
|
||||
<span>Voice</span>
|
||||
<i class="dot" :class="{ recording: isRecording, ptt: isPushToTalk }"></i>
|
||||
<span class="mode-badge" :class="{ gpu: useWhisper }">
|
||||
{{ useWhisper ? 'GPU' : 'Web' }}
|
||||
</span>
|
||||
</div>
|
||||
<div class="window-controls">
|
||||
<button
|
||||
class="whisper-toggle"
|
||||
:class="{ active: useWhisper, loading: whisperLoading }"
|
||||
:disabled="whisperLoading"
|
||||
@click.stop="toggleWhisperMode"
|
||||
:title="whisperLoading ? 'Starting Whisper server...' : (useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU')"
|
||||
>
|
||||
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<rect x="4" y="4" width="16" height="16" rx="2"/>
|
||||
<line x1="9" y1="9" x2="9" y2="15"/>
|
||||
<line x1="15" y1="9" x2="15" y2="15"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button class="x" @click="close" title="Close">
|
||||
<svg width="8" height="8" viewBox="0 0 10 10">
|
||||
<line x1="0" y1="0" x2="10" y2="10" stroke="currentColor" stroke-width="1.5"/>
|
||||
@@ -421,10 +842,10 @@ defineExpose({
|
||||
|
||||
<!-- Content -->
|
||||
<div class="content">
|
||||
<div class="transcript" :class="{ empty: !transcript && !interimTranscript }">
|
||||
<span class="final">{{ transcript }}</span>
|
||||
<div class="transcript" :class="{ empty: !animatedTranscript && !interimTranscript }">
|
||||
<span class="final">{{ animatedTranscript }}</span><span class="cursor" v-if="animatedTranscript && animatedTranscript.length < transcript.length">|</span>
|
||||
<span class="interim">{{ interimTranscript }}</span>
|
||||
<span v-if="!transcript && !interimTranscript" class="placeholder">
|
||||
<span v-if="!animatedTranscript && !interimTranscript" class="placeholder">
|
||||
Presiona el micrófono o mantén Ctrl+Space...
|
||||
</span>
|
||||
</div>
|
||||
@@ -545,6 +966,58 @@ defineExpose({
|
||||
box-shadow: 0 0 6px #f90;
|
||||
}
|
||||
|
||||
.mode-badge {
|
||||
font-size: 8px;
|
||||
padding: 1px 4px;
|
||||
border-radius: 3px;
|
||||
background: rgba(0, 0, 0, 0.2);
|
||||
color: #555;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.mode-badge.gpu {
|
||||
background: linear-gradient(135deg, #10b981, #059669);
|
||||
color: #fff;
|
||||
box-shadow: 0 0 4px rgba(16, 185, 129, 0.5);
|
||||
}
|
||||
|
||||
.whisper-toggle {
|
||||
width: 20px;
|
||||
height: 18px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
background: rgba(255, 255, 255, 0.3);
|
||||
border: 1px solid rgba(0, 0, 0, 0.1);
|
||||
border-radius: 3px;
|
||||
color: #666;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
|
||||
.whisper-toggle:hover:not(:disabled) {
|
||||
background: rgba(255, 255, 255, 0.5);
|
||||
}
|
||||
|
||||
.whisper-toggle:disabled {
|
||||
cursor: not-allowed;
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
.whisper-toggle.active {
|
||||
background: linear-gradient(180deg, #10b981 0%, #059669 100%);
|
||||
border-color: #047857;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.whisper-toggle.loading {
|
||||
animation: pulse 0.6s infinite;
|
||||
background: linear-gradient(180deg, #f59e0b 0%, #d97706 100%);
|
||||
border-color: #b45309;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
0%, 100% { opacity: 1; }
|
||||
50% { opacity: 0.5; }
|
||||
@@ -601,6 +1074,17 @@ defineExpose({
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.transcript .cursor {
|
||||
color: #4a9;
|
||||
font-weight: bold;
|
||||
animation: blink 0.6s infinite;
|
||||
}
|
||||
|
||||
@keyframes blink {
|
||||
0%, 50% { opacity: 1; }
|
||||
51%, 100% { opacity: 0; }
|
||||
}
|
||||
|
||||
.transcript .placeholder {
|
||||
color: #888;
|
||||
}
|
||||
|
||||
@@ -223,6 +223,112 @@ export function createGlobalHandlers(callbacks: ToolManagementCallbacks): ToolCo
|
||||
}, 100)
|
||||
return 'Recargando pagina...'
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'whisper_status',
|
||||
description: 'Obtiene el estado del servidor Whisper GPU para speech-to-text.',
|
||||
category: 'global',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {}
|
||||
},
|
||||
handler: async () => {
|
||||
try {
|
||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
|
||||
const data = await res.json()
|
||||
return `Whisper GPU Status:\n` +
|
||||
` Enabled: ${data.enabled ? 'Yes' : 'No'}\n` +
|
||||
` Running: ${data.running ? 'Yes' : 'No'}\n` +
|
||||
` Model: ${data.model}\n` +
|
||||
` Device: ${data.device}\n` +
|
||||
` Port: ${data.port}`
|
||||
} catch (e: any) {
|
||||
return `Error checking Whisper status: ${e.message}`
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'whisper_toggle',
|
||||
description: 'Activa o desactiva Whisper GPU para speech-to-text. Cuando esta activo usa la GPU para transcribir voz con mejor precision para acentos latinos.',
|
||||
category: 'global',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {}
|
||||
},
|
||||
handler: async () => {
|
||||
try {
|
||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
|
||||
method: 'POST'
|
||||
})
|
||||
const data = await res.json()
|
||||
|
||||
if (data.enabled) {
|
||||
return `Whisper GPU ENABLED\n` +
|
||||
` Model: ${data.model}\n` +
|
||||
` Device: ${data.device}\n` +
|
||||
` Port: ws://localhost:${data.port}\n\n` +
|
||||
`Voice input will now use GPU-accelerated transcription.`
|
||||
} else {
|
||||
return `Whisper GPU DISABLED\n\n` +
|
||||
`Voice input will use Web Speech API (browser native).`
|
||||
}
|
||||
} catch (e: any) {
|
||||
return `Error toggling Whisper: ${e.message}`
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'whisper_start',
|
||||
description: 'Inicia el servidor Whisper GPU si no esta corriendo.',
|
||||
category: 'global',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {}
|
||||
},
|
||||
handler: async () => {
|
||||
try {
|
||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/start`, {
|
||||
method: 'POST'
|
||||
})
|
||||
const data = await res.json()
|
||||
|
||||
if (data.success) {
|
||||
return `Whisper server started!\n` +
|
||||
` Model: ${data.model}\n` +
|
||||
` Device: ${data.device}\n` +
|
||||
` Ready: ${data.running ? 'Yes' : 'Loading...'}`
|
||||
} else {
|
||||
return `Failed to start Whisper server: ${data.message}`
|
||||
}
|
||||
} catch (e: any) {
|
||||
return `Error starting Whisper: ${e.message}`
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'whisper_stop',
|
||||
description: 'Detiene el servidor Whisper GPU para liberar memoria de la GPU.',
|
||||
category: 'global',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {}
|
||||
},
|
||||
handler: async () => {
|
||||
try {
|
||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/stop`, {
|
||||
method: 'POST'
|
||||
})
|
||||
const data = await res.json()
|
||||
|
||||
if (data.success) {
|
||||
return `Whisper server stopped. GPU memory released.`
|
||||
} else {
|
||||
return `Failed to stop Whisper server: ${data.message}`
|
||||
}
|
||||
} catch (e: any) {
|
||||
return `Error stopping Whisper: ${e.message}`
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
"version": "1.0.0",
|
||||
"description": "Dynamic canvas for Claude Code interaction",
|
||||
"scripts": {
|
||||
"start": "concurrently -n server,frontend -c blue,green \"cd server && bun --watch run index.ts\" \"cd frontend && bun run dev --host\""
|
||||
"start": "concurrently -n api,terminal,frontend -c blue,yellow,green \"cd server && bun --watch run index.ts\" \"cd server && bun run terminal.ts\" \"cd frontend && bun run dev --host\"",
|
||||
"start:api": "cd server && bun --watch run index.ts",
|
||||
"start:terminal": "cd server && bun run terminal.ts",
|
||||
"start:frontend": "cd frontend && bun run dev --host"
|
||||
},
|
||||
"devDependencies": {
|
||||
"concurrently": "^9.2.1"
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { PORT_HTTP, WORKING_DIR } from './config'
|
||||
import { initDatabase } from './db'
|
||||
import { handleRequest } from './routes'
|
||||
import { startTerminalServer } from './services/terminal'
|
||||
|
||||
// Initialize database
|
||||
initDatabase()
|
||||
@@ -12,18 +11,10 @@ Bun.serve({
|
||||
fetch: handleRequest
|
||||
})
|
||||
|
||||
console.log(`[HTTP] API running at http://localhost:${PORT_HTTP}`)
|
||||
|
||||
// Start Terminal WebSocket server
|
||||
startTerminalServer()
|
||||
|
||||
// Startup summary
|
||||
console.log('')
|
||||
console.log('='.repeat(50))
|
||||
console.log('Agent UI Server started')
|
||||
console.log('Agent UI API Server (hot-reload enabled)')
|
||||
console.log(` API: http://localhost:${PORT_HTTP}`)
|
||||
console.log(` Terminal: ws://localhost:4103`)
|
||||
console.log(` Working Dir: ${WORKING_DIR}`)
|
||||
console.log('')
|
||||
console.log('WebMCP starts separately with Claude Code MCP')
|
||||
console.log('='.repeat(50))
|
||||
|
||||
@@ -7,6 +7,7 @@ import { handleThemes, handleActiveTheme, handleDesignTokens, handleThemeById, h
|
||||
import { handleCanvas, handleCanvasById, handleToolbarCanvas, handleDefaultCanvas, handleCanvasComponents, handleCanvasComponentById } from './canvas'
|
||||
import { handleGiteaRepo, handleGiteaTree, handleGiteaFile } from './gitea'
|
||||
import { handleTables, handleStats, handleTableSchema, handleTableData, handleQuery } from './database'
|
||||
import { handleWhisperRoutes } from './whisper'
|
||||
|
||||
export async function handleRequest(req: Request): Promise<Response> {
|
||||
const url = new URL(req.url)
|
||||
@@ -168,5 +169,11 @@ export async function handleRequest(req: Request): Promise<Response> {
|
||||
return handleQuery(req)
|
||||
}
|
||||
|
||||
// Whisper (GPU speech-to-text)
|
||||
if (path.startsWith('/api/whisper/')) {
|
||||
const res = await handleWhisperRoutes(req)
|
||||
if (res) return res
|
||||
}
|
||||
|
||||
return notFoundResponse()
|
||||
}
|
||||
|
||||
66
server/routes/whisper.ts
Normal file
66
server/routes/whisper.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
/**
|
||||
* Whisper API routes
|
||||
* Control the local GPU-accelerated speech-to-text server
|
||||
*/
|
||||
|
||||
import {
|
||||
startWhisperServer,
|
||||
stopWhisperServer,
|
||||
toggleWhisperServer,
|
||||
getWhisperState,
|
||||
getWhisperPort
|
||||
} from '../services/whisper'
|
||||
|
||||
export async function handleWhisperRoutes(req: Request): Promise<Response | null> {
|
||||
const url = new URL(req.url)
|
||||
const path = url.pathname
|
||||
|
||||
// GET /api/whisper/status - Get current state
|
||||
if (path === '/api/whisper/status' && req.method === 'GET') {
|
||||
const state = await getWhisperState()
|
||||
return Response.json(state)
|
||||
}
|
||||
|
||||
// POST /api/whisper/start - Start Whisper server
|
||||
if (path === '/api/whisper/start' && req.method === 'POST') {
|
||||
const success = await startWhisperServer()
|
||||
const state = await getWhisperState()
|
||||
return Response.json({
|
||||
success,
|
||||
...state,
|
||||
message: success ? 'Whisper server started' : 'Failed to start Whisper server'
|
||||
})
|
||||
}
|
||||
|
||||
// POST /api/whisper/stop - Stop Whisper server
|
||||
if (path === '/api/whisper/stop' && req.method === 'POST') {
|
||||
const success = stopWhisperServer()
|
||||
const state = await getWhisperState()
|
||||
return Response.json({
|
||||
success,
|
||||
...state,
|
||||
message: success ? 'Whisper server stopped' : 'Failed to stop Whisper server'
|
||||
})
|
||||
}
|
||||
|
||||
// POST /api/whisper/toggle - Toggle Whisper on/off
|
||||
if (path === '/api/whisper/toggle' && req.method === 'POST') {
|
||||
const result = await toggleWhisperServer()
|
||||
const state = await getWhisperState()
|
||||
return Response.json({
|
||||
...result,
|
||||
...state,
|
||||
message: state.enabled ? 'Whisper enabled (GPU)' : 'Whisper disabled (using Web Speech API)'
|
||||
})
|
||||
}
|
||||
|
||||
// GET /api/whisper/port - Get Whisper WebSocket port
|
||||
if (path === '/api/whisper/port' && req.method === 'GET') {
|
||||
return Response.json({
|
||||
port: getWhisperPort(),
|
||||
url: `ws://localhost:${getWhisperPort()}`
|
||||
})
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
247
server/services/whisper.ts
Normal file
247
server/services/whisper.ts
Normal file
@@ -0,0 +1,247 @@
|
||||
/**
|
||||
* Whisper Service - Manages the Python Whisper server process
|
||||
* Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
|
||||
*/
|
||||
|
||||
import { join } from 'path'
|
||||
import { Subprocess } from 'bun'
|
||||
|
||||
const WHISPER_PORT = 4104
|
||||
const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
|
||||
|
||||
interface WhisperState {
|
||||
enabled: boolean
|
||||
running: boolean
|
||||
starting: boolean // Prevents multiple simultaneous start attempts
|
||||
process: Subprocess | null
|
||||
model: string
|
||||
device: string
|
||||
}
|
||||
|
||||
const state: WhisperState = {
|
||||
enabled: false,
|
||||
running: false,
|
||||
starting: false,
|
||||
process: null,
|
||||
model: 'large-v3',
|
||||
device: 'cuda'
|
||||
}
|
||||
|
||||
/**
|
||||
* Kill any process using the Whisper port
|
||||
*/
|
||||
async function killProcessOnPort(port: number): Promise<void> {
|
||||
try {
|
||||
// Use PowerShell to find and kill process on port
|
||||
const proc = Bun.spawn(['powershell', '-Command',
|
||||
`Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
|
||||
], { stdout: 'ignore', stderr: 'ignore' })
|
||||
await proc.exited
|
||||
// Wait a moment for port to be released
|
||||
await new Promise(resolve => setTimeout(resolve, 1000))
|
||||
} catch {
|
||||
// Ignore errors
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the Whisper Python server
|
||||
*/
|
||||
export async function startWhisperServer(): Promise<boolean> {
|
||||
// Prevent multiple simultaneous start attempts
|
||||
if (state.starting) {
|
||||
return false
|
||||
}
|
||||
|
||||
if (state.running && state.process) {
|
||||
return true
|
||||
}
|
||||
|
||||
state.starting = true
|
||||
console.log(`[Whisper] Starting (${state.model})...`)
|
||||
|
||||
// Kill any existing process on the port
|
||||
await killProcessOnPort(WHISPER_PORT)
|
||||
|
||||
try {
|
||||
// Use Bun.spawn with inherit to show logs directly in console
|
||||
// -u flag disables Python output buffering for real-time logs
|
||||
const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
|
||||
cwd: join(import.meta.dir, '..'),
|
||||
stdout: 'inherit',
|
||||
stderr: 'inherit',
|
||||
env: { ...process.env, PYTHONUNBUFFERED: '1' }
|
||||
})
|
||||
|
||||
state.process = proc
|
||||
|
||||
// Wait a bit for the server to start
|
||||
await new Promise(resolve => setTimeout(resolve, 2000))
|
||||
|
||||
// Check if process is still running
|
||||
if (proc.exitCode !== null) {
|
||||
console.error('[Whisper] Process exited with code:', proc.exitCode)
|
||||
state.process = null
|
||||
state.starting = false
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if WebSocket is ready
|
||||
const isListening = await checkPort(WHISPER_PORT)
|
||||
|
||||
if (isListening) {
|
||||
console.log('[Whisper] Ready')
|
||||
state.running = true
|
||||
state.enabled = true
|
||||
state.starting = false
|
||||
return true
|
||||
}
|
||||
|
||||
// Wait more if model is still loading (up to 120 seconds total for large models)
|
||||
for (let i = 0; i < 40; i++) {
|
||||
await new Promise(resolve => setTimeout(resolve, 3000))
|
||||
|
||||
if (proc.exitCode !== null) {
|
||||
console.error('[Whisper] Process died')
|
||||
state.process = null
|
||||
state.starting = false
|
||||
return false
|
||||
}
|
||||
|
||||
const ready = await checkPort(WHISPER_PORT)
|
||||
if (ready) {
|
||||
console.log('[Whisper] Ready')
|
||||
state.running = true
|
||||
state.enabled = true
|
||||
state.starting = false
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
console.error('[Whisper] Timeout (120s)')
|
||||
state.starting = false
|
||||
return false
|
||||
|
||||
} catch (err: any) {
|
||||
console.error('[Whisper] Error:', err.message)
|
||||
state.process = null
|
||||
state.starting = false
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Whisper WebSocket is ready using PowerShell
|
||||
*/
|
||||
async function checkPort(port: number): Promise<boolean> {
|
||||
try {
|
||||
const proc = Bun.spawn(['powershell', '-NoProfile', '-Command',
|
||||
`$c = Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue; if ($c) { Write-Output 'LISTENING' }`
|
||||
], {
|
||||
stdout: 'pipe',
|
||||
stderr: 'ignore'
|
||||
})
|
||||
|
||||
const output = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
|
||||
return output.trim() === 'LISTENING'
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the Whisper server
|
||||
*/
|
||||
export function stopWhisperServer(): boolean {
|
||||
if (!state.process) {
|
||||
return true
|
||||
}
|
||||
|
||||
try {
|
||||
state.process.kill()
|
||||
state.process = null
|
||||
state.running = false
|
||||
state.enabled = false
|
||||
console.log('[Whisper] Stopped')
|
||||
return true
|
||||
} catch (err) {
|
||||
console.error('[Whisper] Stop error:', err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle Whisper server on/off (async - returns immediately when starting)
|
||||
*/
|
||||
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
|
||||
// Prevent toggle while starting
|
||||
if (state.starting) {
|
||||
return { enabled: false, success: false, starting: true }
|
||||
}
|
||||
|
||||
if (state.enabled && state.running) {
|
||||
const success = stopWhisperServer()
|
||||
return { enabled: false, success, starting: false }
|
||||
} else {
|
||||
// Start server in background - don't await
|
||||
startWhisperServer().catch(err => {
|
||||
console.error('[Whisper] Start error:', err)
|
||||
state.starting = false
|
||||
})
|
||||
|
||||
// Return immediately - frontend will poll for status
|
||||
return { enabled: false, success: true, starting: true }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current Whisper state (checks real port status)
|
||||
*/
|
||||
export async function getWhisperState(): Promise<{
|
||||
enabled: boolean
|
||||
running: boolean
|
||||
starting: boolean
|
||||
port: number
|
||||
model: string
|
||||
device: string
|
||||
}> {
|
||||
// Check if port is actually listening (skip if starting to avoid interference)
|
||||
if (!state.starting) {
|
||||
const isListening = await checkPort(WHISPER_PORT)
|
||||
|
||||
// Sync state with reality
|
||||
if (isListening && !state.running) {
|
||||
state.running = true
|
||||
state.enabled = true
|
||||
} else if (!isListening && state.running) {
|
||||
state.running = false
|
||||
state.enabled = false
|
||||
state.process = null
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
enabled: state.enabled,
|
||||
running: state.running,
|
||||
starting: state.starting,
|
||||
port: WHISPER_PORT,
|
||||
model: state.model,
|
||||
device: state.device
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if Whisper is enabled
|
||||
*/
|
||||
export function isWhisperEnabled(): boolean {
|
||||
return state.enabled && state.running
|
||||
}
|
||||
|
||||
// WebSocket server for Whisper (proxies to Python server or handles directly)
|
||||
let whisperWsServer: any = null
|
||||
|
||||
export function getWhisperPort(): number {
|
||||
return WHISPER_PORT
|
||||
}
|
||||
22
server/terminal.ts
Normal file
22
server/terminal.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Terminal Server - Independent process
|
||||
* This runs separately from the main server to maintain stable Claude Code sessions
|
||||
* even when the main server restarts due to code changes.
|
||||
*/
|
||||
|
||||
import { startTerminalServer } from './services/terminal'
|
||||
import { WORKING_DIR } from './config'
|
||||
|
||||
console.log('')
|
||||
console.log('='.repeat(50))
|
||||
console.log('Terminal Server (Independent Process)')
|
||||
console.log(` WebSocket: ws://localhost:4103`)
|
||||
console.log(` Working Dir: ${WORKING_DIR}`)
|
||||
console.log('')
|
||||
console.log('This process is stable and won\'t restart')
|
||||
console.log('when the main server reloads.')
|
||||
console.log('='.repeat(50))
|
||||
console.log('')
|
||||
|
||||
startTerminalServer()
|
||||
314
server/whisper_server.py
Normal file
314
server/whisper_server.py
Normal file
@@ -0,0 +1,314 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Whisper Server - GPU-accelerated speech-to-text using faster-whisper
|
||||
WebSocket server that receives audio and returns transcriptions
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import io
|
||||
import wave
|
||||
import tempfile
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import websockets
|
||||
from faster_whisper import WhisperModel
|
||||
except ImportError as e:
|
||||
print(f"Missing dependency: {e}")
|
||||
print("Run: pip install faster-whisper websockets")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes:
|
||||
"""
|
||||
Convert audio data to WAV format using ffmpeg.
|
||||
Whisper requires WAV/PCM format, but browsers typically record in WebM/Opus.
|
||||
"""
|
||||
# Create temp files for input and output
|
||||
with tempfile.NamedTemporaryFile(suffix=f".{input_format}", delete=False) as in_file:
|
||||
in_file.write(input_data)
|
||||
input_path = in_file.name
|
||||
|
||||
output_path = input_path.replace(f".{input_format}", ".wav")
|
||||
|
||||
try:
|
||||
# Use ffmpeg to convert to WAV (16kHz mono, which Whisper prefers)
|
||||
result = subprocess.run([
|
||||
"ffmpeg", "-y", # Overwrite output
|
||||
"-i", input_path, # Input file
|
||||
"-ar", "16000", # Sample rate 16kHz
|
||||
"-ac", "1", # Mono
|
||||
"-c:a", "pcm_s16le", # PCM 16-bit little-endian
|
||||
output_path
|
||||
], capture_output=True, text=True, timeout=30)
|
||||
|
||||
if result.returncode != 0:
|
||||
print(f"[Whisper] ffmpeg error: {result.stderr}")
|
||||
return None
|
||||
|
||||
# Read the converted WAV file
|
||||
with open(output_path, "rb") as f:
|
||||
wav_data = f.read()
|
||||
|
||||
return wav_data
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
print("[Whisper] ffmpeg conversion timed out")
|
||||
return None
|
||||
except FileNotFoundError:
|
||||
print("[Whisper] ffmpeg not found - please install ffmpeg")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"[Whisper] Conversion error: {e}")
|
||||
return None
|
||||
finally:
|
||||
# Cleanup temp files
|
||||
try:
|
||||
os.unlink(input_path)
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
os.unlink(output_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Configuration
|
||||
HOST = "localhost"
|
||||
PORT = 4104
|
||||
MODEL_SIZE = "large-v3" # Best standard model for Spanish
|
||||
DEVICE = "cuda" # cuda or cpu
|
||||
COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU
|
||||
|
||||
# Model display name (extract from path if needed)
|
||||
MODEL_NAME = MODEL_SIZE.split("/")[-1] if "/" in MODEL_SIZE else MODEL_SIZE
|
||||
|
||||
# Spanish context prompt to improve accuracy (Honduras Spanish + tech context)
|
||||
INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software.
|
||||
Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código.
|
||||
Vocabulario técnico: servidor, frontend, backend, chunks, WebSocket, transcripción,
|
||||
componente, función, variable, API, modelo, Whisper, Claude, MCP, configuración.
|
||||
Expresiones hondureñas: vos, tenés, podés, mirá, pues, verdad, ajá, entonces.
|
||||
Diminutivos comunes: ahorita, ratito, prontito, despuesito, chiquito, tantito, poquito."""
|
||||
|
||||
# Global model instance
|
||||
model = None
|
||||
model_loading = False
|
||||
|
||||
async def load_model():
|
||||
"""Load Whisper model (lazy loading on first request)"""
|
||||
global model, model_loading
|
||||
|
||||
if model is not None:
|
||||
return model
|
||||
|
||||
if model_loading:
|
||||
# Wait for model to finish loading
|
||||
while model_loading:
|
||||
await asyncio.sleep(0.1)
|
||||
return model
|
||||
|
||||
model_loading = True
|
||||
print(f"[Whisper] Loading model '{MODEL_NAME}' on {DEVICE}...")
|
||||
|
||||
try:
|
||||
# Load model - this downloads on first run
|
||||
model = WhisperModel(
|
||||
MODEL_SIZE,
|
||||
device=DEVICE,
|
||||
compute_type=COMPUTE_TYPE,
|
||||
download_root=str(Path.home() / ".cache" / "whisper")
|
||||
)
|
||||
print(f"[Whisper] Model loaded successfully!")
|
||||
except Exception as e:
|
||||
print(f"[Whisper] Error loading model: {e}")
|
||||
print("[Whisper] Falling back to CPU...")
|
||||
model = WhisperModel(
|
||||
MODEL_SIZE,
|
||||
device="cpu",
|
||||
compute_type="int8",
|
||||
download_root=str(Path.home() / ".cache" / "whisper")
|
||||
)
|
||||
|
||||
model_loading = False
|
||||
return model
|
||||
|
||||
def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = True) -> dict:
|
||||
"""Transcribe audio data using Whisper"""
|
||||
global model
|
||||
|
||||
if model is None:
|
||||
return {"error": "Model not loaded"}
|
||||
|
||||
# Convert WebM to WAV if needed
|
||||
if is_webm:
|
||||
wav_data = convert_audio_to_wav(audio_data, "webm")
|
||||
if wav_data is None:
|
||||
return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."}
|
||||
else:
|
||||
wav_data = audio_data
|
||||
|
||||
# Save audio to temp file (faster-whisper needs a file path)
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
||||
f.write(wav_data)
|
||||
temp_path = f.name
|
||||
|
||||
try:
|
||||
# Transcribe with optimized parameters
|
||||
segments, info = model.transcribe(
|
||||
temp_path,
|
||||
language=language,
|
||||
beam_size=5,
|
||||
best_of=5, # Number of candidates when sampling
|
||||
temperature=0.0, # Use greedy decoding (most accurate)
|
||||
vad_filter=True, # Voice activity detection
|
||||
vad_parameters=dict(
|
||||
min_silence_duration_ms=300, # Shorter silence detection
|
||||
speech_pad_ms=200, # Padding around speech
|
||||
threshold=0.5 # VAD sensitivity (lower = more sensitive)
|
||||
),
|
||||
initial_prompt=INITIAL_PROMPT, # Context for better Spanish transcription
|
||||
condition_on_previous_text=True, # Use context from previous segments
|
||||
no_speech_threshold=0.6,
|
||||
log_prob_threshold=-1.0,
|
||||
compression_ratio_threshold=2.4,
|
||||
word_timestamps=False # Faster without word-level timestamps
|
||||
)
|
||||
|
||||
# Collect all segments
|
||||
text = ""
|
||||
segments_list = []
|
||||
for segment in segments:
|
||||
text += segment.text + " "
|
||||
segments_list.append({
|
||||
"start": segment.start,
|
||||
"end": segment.end,
|
||||
"text": segment.text
|
||||
})
|
||||
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"text": text.strip(),
|
||||
"language": info.language,
|
||||
"language_probability": info.language_probability,
|
||||
"duration": info.duration,
|
||||
"segments": segments_list,
|
||||
"engine": "whisper-gpu",
|
||||
"model": MODEL_NAME,
|
||||
"device": DEVICE
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"[Whisper] Transcription error: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
finally:
|
||||
# Cleanup temp file
|
||||
try:
|
||||
os.unlink(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
async def handle_client(websocket):
|
||||
"""Handle WebSocket client connection"""
|
||||
|
||||
# Ensure model is loaded
|
||||
await load_model()
|
||||
|
||||
# Send ready message
|
||||
await websocket.send(json.dumps({
|
||||
"type": "ready",
|
||||
"model": MODEL_NAME,
|
||||
"device": DEVICE
|
||||
}))
|
||||
|
||||
try:
|
||||
async for message in websocket:
|
||||
if isinstance(message, bytes):
|
||||
# Binary audio data (likely WebM format from browser)
|
||||
# Transcribe in thread pool to not block
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: transcribe_audio(message, "es", is_webm=True)
|
||||
)
|
||||
|
||||
await websocket.send(json.dumps({
|
||||
"type": "transcription",
|
||||
**result
|
||||
}))
|
||||
|
||||
else:
|
||||
# JSON command
|
||||
try:
|
||||
cmd = json.loads(message)
|
||||
|
||||
if cmd.get("type") == "transcribe":
|
||||
# Audio data sent as base64 (WebM format from browser)
|
||||
import base64
|
||||
audio_data = base64.b64decode(cmd.get("audio", ""))
|
||||
language = cmd.get("language", "es")
|
||||
is_partial = cmd.get("partial", False)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: transcribe_audio(audio_data, language, is_webm=True)
|
||||
)
|
||||
|
||||
# Add partial flag to result
|
||||
if is_partial:
|
||||
result["partial"] = True
|
||||
|
||||
await websocket.send(json.dumps({
|
||||
"type": "transcription",
|
||||
**result
|
||||
}))
|
||||
|
||||
elif cmd.get("type") == "ping":
|
||||
await websocket.send(json.dumps({"type": "pong"}))
|
||||
|
||||
elif cmd.get("type") == "status":
|
||||
await websocket.send(json.dumps({
|
||||
"type": "status",
|
||||
"model": MODEL_NAME,
|
||||
"device": DEVICE,
|
||||
"ready": model is not None
|
||||
}))
|
||||
|
||||
except json.JSONDecodeError:
|
||||
await websocket.send(json.dumps({
|
||||
"type": "error",
|
||||
"message": "Invalid JSON"
|
||||
}))
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"[Whisper] Error: {e}")
|
||||
|
||||
async def main():
|
||||
"""Start WebSocket server"""
|
||||
print(f"[Whisper] Model: {MODEL_NAME} | Device: {DEVICE} | Port: {PORT}")
|
||||
|
||||
# Pre-load model
|
||||
await load_model()
|
||||
|
||||
async with websockets.serve(handle_client, HOST, PORT):
|
||||
print(f"[Whisper] Ready")
|
||||
await asyncio.Future() # Run forever
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Install websockets if needed
|
||||
try:
|
||||
import websockets
|
||||
except ImportError:
|
||||
import subprocess
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "websockets"])
|
||||
import websockets
|
||||
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user