fix: Improve Whisper server startup with async polling and reduce logs
- Make server startup async to avoid Bun's 10s timeout - Add frontend polling to detect when server is ready - Use PowerShell Get-NetTCPConnection for reliable port detection - Add starting state to prevent multiple simultaneous starts - Reduce verbose logging, keep only essential info - Add dev-dist and nul to gitignore
This commit is contained in:
@@ -50,7 +50,9 @@
|
|||||||
"mcp__agent-ui__localhost_4100-notificar",
|
"mcp__agent-ui__localhost_4100-notificar",
|
||||||
"mcp__agent-ui__localhost_4100-enviar_al_panel",
|
"mcp__agent-ui__localhost_4100-enviar_al_panel",
|
||||||
"mcp__agent-ui__localhost_4100-render_html",
|
"mcp__agent-ui__localhost_4100-render_html",
|
||||||
"mcp__agent-ui__localhost_4100-load_vue_component"
|
"mcp__agent-ui__localhost_4100-load_vue_component",
|
||||||
|
"mcp__agent-ui__localhost_4100-page_refresh",
|
||||||
|
"WebFetch(domain:docs.anthropic.com)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"enableAllProjectMcpServers": true,
|
"enableAllProjectMcpServers": true,
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,3 +3,5 @@ frontend/node_modules/
|
|||||||
.env
|
.env
|
||||||
*.log
|
*.log
|
||||||
dist/
|
dist/
|
||||||
|
frontend/dev-dist/
|
||||||
|
nul
|
||||||
|
|||||||
@@ -135,35 +135,59 @@ function initRecognition() {
|
|||||||
|
|
||||||
// ============ WHISPER FUNCTIONS ============
|
// ============ WHISPER FUNCTIONS ============
|
||||||
|
|
||||||
async function checkWhisperStatus() {
|
async function checkWhisperStatus(updateLoading = true) {
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
|
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/status`)
|
||||||
const data = await res.json()
|
const data = await res.json()
|
||||||
useWhisper.value = data.enabled
|
useWhisper.value = data.enabled
|
||||||
whisperReady.value = data.running
|
whisperReady.value = data.running
|
||||||
|
if (updateLoading) {
|
||||||
|
whisperLoading.value = data.starting || false
|
||||||
|
}
|
||||||
return data
|
return data
|
||||||
} catch {
|
} catch {
|
||||||
useWhisper.value = false
|
useWhisper.value = false
|
||||||
whisperReady.value = false
|
whisperReady.value = false
|
||||||
|
if (updateLoading) {
|
||||||
|
whisperLoading.value = false
|
||||||
|
}
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function toggleWhisperMode() {
|
async function toggleWhisperMode() {
|
||||||
|
// Prevent multiple clicks
|
||||||
|
if (whisperLoading.value) {
|
||||||
|
console.log('[Voice] Toggle already in progress, ignoring')
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
whisperLoading.value = true
|
whisperLoading.value = true
|
||||||
error.value = ''
|
error.value = ''
|
||||||
|
|
||||||
|
// Show immediate feedback
|
||||||
|
if (!useWhisper.value) {
|
||||||
|
canvasStore.showNotification('Starting Whisper GPU server...', 'info', 10000)
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
|
const res = await fetch(`http://${window.location.hostname}:4100/api/whisper/toggle`, {
|
||||||
method: 'POST'
|
method: 'POST'
|
||||||
})
|
})
|
||||||
const data = await res.json()
|
const data = await res.json()
|
||||||
|
|
||||||
|
// Server is starting - poll until ready
|
||||||
|
if (data.starting) {
|
||||||
|
console.log('[Voice] Server starting, polling for status...')
|
||||||
|
await pollWhisperStatus()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
useWhisper.value = data.enabled
|
useWhisper.value = data.enabled
|
||||||
whisperReady.value = data.running
|
whisperReady.value = data.running
|
||||||
|
|
||||||
if (data.enabled) {
|
if (data.enabled) {
|
||||||
canvasStore.showNotification('Whisper GPU enabled', 'success')
|
canvasStore.showNotification('Whisper GPU ready!', 'success')
|
||||||
connectWhisperSocket()
|
connectWhisperSocket()
|
||||||
} else {
|
} else {
|
||||||
canvasStore.showNotification('Using Web Speech API', 'info')
|
canvasStore.showNotification('Using Web Speech API', 'info')
|
||||||
@@ -171,12 +195,61 @@ async function toggleWhisperMode() {
|
|||||||
}
|
}
|
||||||
} catch (e: any) {
|
} catch (e: any) {
|
||||||
error.value = 'Failed to toggle Whisper'
|
error.value = 'Failed to toggle Whisper'
|
||||||
|
canvasStore.showNotification('Error starting Whisper server', 'error')
|
||||||
console.error('[Voice] Whisper toggle error:', e)
|
console.error('[Voice] Whisper toggle error:', e)
|
||||||
} finally {
|
} finally {
|
||||||
whisperLoading.value = false
|
whisperLoading.value = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Poll server status until ready or failed
|
||||||
|
async function pollWhisperStatus() {
|
||||||
|
const maxAttempts = 60 // 2 minutes max
|
||||||
|
let attempts = 0
|
||||||
|
|
||||||
|
while (attempts < maxAttempts) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 2000))
|
||||||
|
attempts++
|
||||||
|
|
||||||
|
try {
|
||||||
|
const status = await checkWhisperStatus(false) // Don't update loading state
|
||||||
|
|
||||||
|
if (!status) {
|
||||||
|
console.log('[Voice] Failed to get status')
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Still starting
|
||||||
|
if (status.starting) {
|
||||||
|
console.log(`[Voice] Still starting... (${attempts * 2}s)`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Started successfully
|
||||||
|
if (status.running && status.enabled) {
|
||||||
|
console.log('[Voice] Server ready!')
|
||||||
|
canvasStore.showNotification('Whisper GPU ready!', 'success')
|
||||||
|
connectWhisperSocket()
|
||||||
|
whisperLoading.value = false
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Failed to start
|
||||||
|
console.log('[Voice] Server failed to start')
|
||||||
|
canvasStore.showNotification('Whisper server failed to start', 'error')
|
||||||
|
whisperLoading.value = false
|
||||||
|
return
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[Voice] Polling error:', e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timeout
|
||||||
|
canvasStore.showNotification('Whisper server timeout', 'error')
|
||||||
|
whisperLoading.value = false
|
||||||
|
}
|
||||||
|
|
||||||
function connectWhisperSocket() {
|
function connectWhisperSocket() {
|
||||||
if (whisperSocket?.readyState === WebSocket.OPEN) return
|
if (whisperSocket?.readyState === WebSocket.OPEN) return
|
||||||
|
|
||||||
@@ -671,8 +744,13 @@ onMounted(async () => {
|
|||||||
document.addEventListener('keyup', handleKeyUp, { capture: true })
|
document.addEventListener('keyup', handleKeyUp, { capture: true })
|
||||||
|
|
||||||
// Check Whisper status on mount
|
// Check Whisper status on mount
|
||||||
await checkWhisperStatus()
|
const status = await checkWhisperStatus()
|
||||||
if (useWhisper.value) {
|
|
||||||
|
// If server is starting (page was reloaded during startup), continue polling
|
||||||
|
if (status?.starting) {
|
||||||
|
console.log('[Voice] Server is starting, resuming polling...')
|
||||||
|
pollWhisperStatus()
|
||||||
|
} else if (useWhisper.value) {
|
||||||
connectWhisperSocket()
|
connectWhisperSocket()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -743,8 +821,9 @@ defineExpose({
|
|||||||
<button
|
<button
|
||||||
class="whisper-toggle"
|
class="whisper-toggle"
|
||||||
:class="{ active: useWhisper, loading: whisperLoading }"
|
:class="{ active: useWhisper, loading: whisperLoading }"
|
||||||
|
:disabled="whisperLoading"
|
||||||
@click.stop="toggleWhisperMode"
|
@click.stop="toggleWhisperMode"
|
||||||
:title="useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU'"
|
:title="whisperLoading ? 'Starting Whisper server...' : (useWhisper ? 'Using Whisper GPU - Click to use Web Speech' : 'Using Web Speech - Click to use Whisper GPU')"
|
||||||
>
|
>
|
||||||
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
<svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||||
<rect x="4" y="4" width="16" height="16" rx="2"/>
|
<rect x="4" y="4" width="16" height="16" rx="2"/>
|
||||||
@@ -917,10 +996,15 @@ defineExpose({
|
|||||||
transition: all 0.15s;
|
transition: all 0.15s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.whisper-toggle:hover {
|
.whisper-toggle:hover:not(:disabled) {
|
||||||
background: rgba(255, 255, 255, 0.5);
|
background: rgba(255, 255, 255, 0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.whisper-toggle:disabled {
|
||||||
|
cursor: not-allowed;
|
||||||
|
opacity: 0.6;
|
||||||
|
}
|
||||||
|
|
||||||
.whisper-toggle.active {
|
.whisper-toggle.active {
|
||||||
background: linear-gradient(180deg, #10b981 0%, #059669 100%);
|
background: linear-gradient(180deg, #10b981 0%, #059669 100%);
|
||||||
border-color: #047857;
|
border-color: #047857;
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
|
|||||||
interface WhisperState {
|
interface WhisperState {
|
||||||
enabled: boolean
|
enabled: boolean
|
||||||
running: boolean
|
running: boolean
|
||||||
|
starting: boolean // Prevents multiple simultaneous start attempts
|
||||||
process: Subprocess | null
|
process: Subprocess | null
|
||||||
model: string
|
model: string
|
||||||
device: string
|
device: string
|
||||||
@@ -20,8 +21,9 @@ interface WhisperState {
|
|||||||
const state: WhisperState = {
|
const state: WhisperState = {
|
||||||
enabled: false,
|
enabled: false,
|
||||||
running: false,
|
running: false,
|
||||||
|
starting: false,
|
||||||
process: null,
|
process: null,
|
||||||
model: 'medium',
|
model: 'large-v3',
|
||||||
device: 'cuda'
|
device: 'cuda'
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,89 +48,104 @@ async function killProcessOnPort(port: number): Promise<void> {
|
|||||||
* Start the Whisper Python server
|
* Start the Whisper Python server
|
||||||
*/
|
*/
|
||||||
export async function startWhisperServer(): Promise<boolean> {
|
export async function startWhisperServer(): Promise<boolean> {
|
||||||
|
// Prevent multiple simultaneous start attempts
|
||||||
|
if (state.starting) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
if (state.running && state.process) {
|
if (state.running && state.process) {
|
||||||
console.log('[Whisper] Server already running')
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[Whisper] ====== STARTING (v3) ======')
|
state.starting = true
|
||||||
console.log('[Whisper] Script:', WHISPER_SCRIPT)
|
console.log(`[Whisper] Starting (${state.model})...`)
|
||||||
|
|
||||||
// Kill any existing process on the port
|
// Kill any existing process on the port
|
||||||
console.log('[Whisper] Cleaning up port', WHISPER_PORT)
|
|
||||||
await killProcessOnPort(WHISPER_PORT)
|
await killProcessOnPort(WHISPER_PORT)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Use Bun.spawn with inherit to show logs directly in console
|
// Use Bun.spawn with inherit to show logs directly in console
|
||||||
const proc = Bun.spawn(['python', WHISPER_SCRIPT], {
|
// -u flag disables Python output buffering for real-time logs
|
||||||
|
const proc = Bun.spawn(['python', '-u', WHISPER_SCRIPT], {
|
||||||
cwd: join(import.meta.dir, '..'),
|
cwd: join(import.meta.dir, '..'),
|
||||||
stdout: 'inherit',
|
stdout: 'inherit',
|
||||||
stderr: 'inherit',
|
stderr: 'inherit',
|
||||||
env: { ...process.env }
|
env: { ...process.env, PYTHONUNBUFFERED: '1' }
|
||||||
})
|
})
|
||||||
|
|
||||||
state.process = proc
|
state.process = proc
|
||||||
|
|
||||||
// Wait a bit for the server to start, then check if port is listening
|
// Wait a bit for the server to start
|
||||||
await new Promise(resolve => setTimeout(resolve, 3000))
|
await new Promise(resolve => setTimeout(resolve, 2000))
|
||||||
|
|
||||||
// Check if process is still running
|
// Check if process is still running
|
||||||
if (proc.exitCode !== null) {
|
if (proc.exitCode !== null) {
|
||||||
console.error('[Whisper] Process exited with code:', proc.exitCode)
|
console.error('[Whisper] Process exited with code:', proc.exitCode)
|
||||||
state.process = null
|
state.process = null
|
||||||
|
state.starting = false
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if port is listening (simple TCP check)
|
// Check if WebSocket is ready
|
||||||
const isListening = await checkPort(WHISPER_PORT)
|
const isListening = await checkPort(WHISPER_PORT)
|
||||||
|
|
||||||
if (isListening) {
|
if (isListening) {
|
||||||
console.log('[Whisper] Server started successfully on port', WHISPER_PORT)
|
console.log('[Whisper] Ready')
|
||||||
state.running = true
|
state.running = true
|
||||||
state.enabled = true
|
state.enabled = true
|
||||||
|
state.starting = false
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait more if model is still loading (up to 90 seconds total)
|
// Wait more if model is still loading (up to 120 seconds total for large models)
|
||||||
console.log('[Whisper] Waiting for model to load...')
|
for (let i = 0; i < 40; i++) {
|
||||||
for (let i = 0; i < 30; i++) {
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 3000))
|
await new Promise(resolve => setTimeout(resolve, 3000))
|
||||||
|
|
||||||
if (proc.exitCode !== null) {
|
if (proc.exitCode !== null) {
|
||||||
console.error('[Whisper] Process died while loading')
|
console.error('[Whisper] Process died')
|
||||||
state.process = null
|
state.process = null
|
||||||
|
state.starting = false
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if (await checkPort(WHISPER_PORT)) {
|
const ready = await checkPort(WHISPER_PORT)
|
||||||
console.log('[Whisper] Server ready!')
|
if (ready) {
|
||||||
|
console.log('[Whisper] Ready')
|
||||||
state.running = true
|
state.running = true
|
||||||
state.enabled = true
|
state.enabled = true
|
||||||
|
state.starting = false
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[Whisper] Timeout waiting for server')
|
console.error('[Whisper] Timeout (120s)')
|
||||||
|
state.starting = false
|
||||||
return false
|
return false
|
||||||
|
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
console.error('[Whisper] Failed to start:', err.message)
|
console.error('[Whisper] Error:', err.message)
|
||||||
state.process = null
|
state.process = null
|
||||||
|
state.starting = false
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a port is listening using PowerShell
|
* Check if Whisper WebSocket is ready using PowerShell
|
||||||
*/
|
*/
|
||||||
async function checkPort(port: number): Promise<boolean> {
|
async function checkPort(port: number): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
const proc = Bun.spawn(['powershell', '-Command',
|
const proc = Bun.spawn(['powershell', '-NoProfile', '-Command',
|
||||||
`if (Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue) { exit 0 } else { exit 1 }`
|
`$c = Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue; if ($c) { Write-Output 'LISTENING' }`
|
||||||
], { stdout: 'ignore', stderr: 'ignore' })
|
], {
|
||||||
|
stdout: 'pipe',
|
||||||
|
stderr: 'ignore'
|
||||||
|
})
|
||||||
|
|
||||||
const exitCode = await proc.exited
|
const output = await new Response(proc.stdout).text()
|
||||||
return exitCode === 0
|
await proc.exited
|
||||||
|
|
||||||
|
return output.trim() === 'LISTENING'
|
||||||
} catch {
|
} catch {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -139,35 +156,43 @@ async function checkPort(port: number): Promise<boolean> {
|
|||||||
*/
|
*/
|
||||||
export function stopWhisperServer(): boolean {
|
export function stopWhisperServer(): boolean {
|
||||||
if (!state.process) {
|
if (!state.process) {
|
||||||
console.log('[Whisper] No server running')
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[Whisper] Stopping server...')
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
state.process.kill()
|
state.process.kill()
|
||||||
state.process = null
|
state.process = null
|
||||||
state.running = false
|
state.running = false
|
||||||
state.enabled = false
|
state.enabled = false
|
||||||
console.log('[Whisper] Server stopped')
|
console.log('[Whisper] Stopped')
|
||||||
return true
|
return true
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('[Whisper] Error stopping server:', err)
|
console.error('[Whisper] Stop error:', err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Toggle Whisper server on/off
|
* Toggle Whisper server on/off (async - returns immediately when starting)
|
||||||
*/
|
*/
|
||||||
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean }> {
|
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean; starting: boolean }> {
|
||||||
|
// Prevent toggle while starting
|
||||||
|
if (state.starting) {
|
||||||
|
return { enabled: false, success: false, starting: true }
|
||||||
|
}
|
||||||
|
|
||||||
if (state.enabled && state.running) {
|
if (state.enabled && state.running) {
|
||||||
const success = stopWhisperServer()
|
const success = stopWhisperServer()
|
||||||
return { enabled: false, success }
|
return { enabled: false, success, starting: false }
|
||||||
} else {
|
} else {
|
||||||
const success = await startWhisperServer()
|
// Start server in background - don't await
|
||||||
return { enabled: success, success }
|
startWhisperServer().catch(err => {
|
||||||
|
console.error('[Whisper] Start error:', err)
|
||||||
|
state.starting = false
|
||||||
|
})
|
||||||
|
|
||||||
|
// Return immediately - frontend will poll for status
|
||||||
|
return { enabled: false, success: true, starting: true }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,11 +202,13 @@ export async function toggleWhisperServer(): Promise<{ enabled: boolean; success
|
|||||||
export async function getWhisperState(): Promise<{
|
export async function getWhisperState(): Promise<{
|
||||||
enabled: boolean
|
enabled: boolean
|
||||||
running: boolean
|
running: boolean
|
||||||
|
starting: boolean
|
||||||
port: number
|
port: number
|
||||||
model: string
|
model: string
|
||||||
device: string
|
device: string
|
||||||
}> {
|
}> {
|
||||||
// Check if port is actually listening
|
// Check if port is actually listening (skip if starting to avoid interference)
|
||||||
|
if (!state.starting) {
|
||||||
const isListening = await checkPort(WHISPER_PORT)
|
const isListening = await checkPort(WHISPER_PORT)
|
||||||
|
|
||||||
// Sync state with reality
|
// Sync state with reality
|
||||||
@@ -193,10 +220,12 @@ export async function getWhisperState(): Promise<{
|
|||||||
state.enabled = false
|
state.enabled = false
|
||||||
state.process = null
|
state.process = null
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
enabled: state.enabled,
|
enabled: state.enabled,
|
||||||
running: state.running,
|
running: state.running,
|
||||||
|
starting: state.starting,
|
||||||
port: WHISPER_PORT,
|
port: WHISPER_PORT,
|
||||||
model: state.model,
|
model: state.model,
|
||||||
device: state.device
|
device: state.device
|
||||||
|
|||||||
@@ -79,10 +79,13 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
|
|||||||
# Configuration
|
# Configuration
|
||||||
HOST = "localhost"
|
HOST = "localhost"
|
||||||
PORT = 4104
|
PORT = 4104
|
||||||
MODEL_SIZE = "large-v3" # tiny, base, small, medium, large-v2, large-v3
|
MODEL_SIZE = "large-v3" # Best standard model for Spanish
|
||||||
DEVICE = "cuda" # cuda or cpu
|
DEVICE = "cuda" # cuda or cpu
|
||||||
COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU
|
COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU
|
||||||
|
|
||||||
|
# Model display name (extract from path if needed)
|
||||||
|
MODEL_NAME = MODEL_SIZE.split("/")[-1] if "/" in MODEL_SIZE else MODEL_SIZE
|
||||||
|
|
||||||
# Spanish context prompt to improve accuracy (Honduras Spanish + tech context)
|
# Spanish context prompt to improve accuracy (Honduras Spanish + tech context)
|
||||||
INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software.
|
INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software.
|
||||||
Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código.
|
Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código.
|
||||||
@@ -109,7 +112,7 @@ async def load_model():
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
model_loading = True
|
model_loading = True
|
||||||
print(f"[Whisper] Loading model '{MODEL_SIZE}' on {DEVICE}...")
|
print(f"[Whisper] Loading model '{MODEL_NAME}' on {DEVICE}...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Load model - this downloads on first run
|
# Load model - this downloads on first run
|
||||||
@@ -140,15 +143,11 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
|||||||
if model is None:
|
if model is None:
|
||||||
return {"error": "Model not loaded"}
|
return {"error": "Model not loaded"}
|
||||||
|
|
||||||
print(f"[Whisper] Received {len(audio_data)} bytes of audio data")
|
|
||||||
|
|
||||||
# Convert WebM to WAV if needed
|
# Convert WebM to WAV if needed
|
||||||
if is_webm:
|
if is_webm:
|
||||||
print("[Whisper] Converting WebM to WAV...")
|
|
||||||
wav_data = convert_audio_to_wav(audio_data, "webm")
|
wav_data = convert_audio_to_wav(audio_data, "webm")
|
||||||
if wav_data is None:
|
if wav_data is None:
|
||||||
return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."}
|
return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."}
|
||||||
print(f"[Whisper] Converted to {len(wav_data)} bytes WAV")
|
|
||||||
else:
|
else:
|
||||||
wav_data = audio_data
|
wav_data = audio_data
|
||||||
|
|
||||||
@@ -159,7 +158,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Transcribe with optimized parameters
|
# Transcribe with optimized parameters
|
||||||
print(f"[Whisper] Transcribing {temp_path}...")
|
|
||||||
segments, info = model.transcribe(
|
segments, info = model.transcribe(
|
||||||
temp_path,
|
temp_path,
|
||||||
language=language,
|
language=language,
|
||||||
@@ -191,7 +189,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
|||||||
"text": segment.text
|
"text": segment.text
|
||||||
})
|
})
|
||||||
|
|
||||||
print(f"[Whisper] Transcription result: '{text.strip()}'")
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
@@ -201,7 +198,7 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
|||||||
"duration": info.duration,
|
"duration": info.duration,
|
||||||
"segments": segments_list,
|
"segments": segments_list,
|
||||||
"engine": "whisper-gpu",
|
"engine": "whisper-gpu",
|
||||||
"model": MODEL_SIZE,
|
"model": MODEL_NAME,
|
||||||
"device": DEVICE
|
"device": DEVICE
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,7 +215,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
|||||||
|
|
||||||
async def handle_client(websocket):
|
async def handle_client(websocket):
|
||||||
"""Handle WebSocket client connection"""
|
"""Handle WebSocket client connection"""
|
||||||
print(f"[Whisper] Client connected")
|
|
||||||
|
|
||||||
# Ensure model is loaded
|
# Ensure model is loaded
|
||||||
await load_model()
|
await load_model()
|
||||||
@@ -226,7 +222,7 @@ async def handle_client(websocket):
|
|||||||
# Send ready message
|
# Send ready message
|
||||||
await websocket.send(json.dumps({
|
await websocket.send(json.dumps({
|
||||||
"type": "ready",
|
"type": "ready",
|
||||||
"model": MODEL_SIZE,
|
"model": MODEL_NAME,
|
||||||
"device": DEVICE
|
"device": DEVICE
|
||||||
}))
|
}))
|
||||||
|
|
||||||
@@ -234,8 +230,6 @@ async def handle_client(websocket):
|
|||||||
async for message in websocket:
|
async for message in websocket:
|
||||||
if isinstance(message, bytes):
|
if isinstance(message, bytes):
|
||||||
# Binary audio data (likely WebM format from browser)
|
# Binary audio data (likely WebM format from browser)
|
||||||
print(f"[Whisper] Received {len(message)} bytes of binary audio")
|
|
||||||
|
|
||||||
# Transcribe in thread pool to not block
|
# Transcribe in thread pool to not block
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
result = await loop.run_in_executor(
|
result = await loop.run_in_executor(
|
||||||
@@ -260,8 +254,6 @@ async def handle_client(websocket):
|
|||||||
language = cmd.get("language", "es")
|
language = cmd.get("language", "es")
|
||||||
is_partial = cmd.get("partial", False)
|
is_partial = cmd.get("partial", False)
|
||||||
|
|
||||||
print(f"[Whisper] Transcribe request: {len(audio_data)} bytes, lang={language}, partial={is_partial}")
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
result = await loop.run_in_executor(
|
result = await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
@@ -283,7 +275,7 @@ async def handle_client(websocket):
|
|||||||
elif cmd.get("type") == "status":
|
elif cmd.get("type") == "status":
|
||||||
await websocket.send(json.dumps({
|
await websocket.send(json.dumps({
|
||||||
"type": "status",
|
"type": "status",
|
||||||
"model": MODEL_SIZE,
|
"model": MODEL_NAME,
|
||||||
"device": DEVICE,
|
"device": DEVICE,
|
||||||
"ready": model is not None
|
"ready": model is not None
|
||||||
}))
|
}))
|
||||||
@@ -295,21 +287,19 @@ async def handle_client(websocket):
|
|||||||
}))
|
}))
|
||||||
|
|
||||||
except websockets.exceptions.ConnectionClosed:
|
except websockets.exceptions.ConnectionClosed:
|
||||||
print("[Whisper] Client disconnected")
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[Whisper] Error: {e}")
|
print(f"[Whisper] Error: {e}")
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
"""Start WebSocket server"""
|
"""Start WebSocket server"""
|
||||||
print(f"[Whisper] Starting server on ws://{HOST}:{PORT}")
|
print(f"[Whisper] Model: {MODEL_NAME} | Device: {DEVICE} | Port: {PORT}")
|
||||||
print(f"[Whisper] Model: {MODEL_SIZE}, Device: {DEVICE}")
|
|
||||||
|
|
||||||
# Pre-load model
|
# Pre-load model
|
||||||
print("[Whisper] Pre-loading model...")
|
|
||||||
await load_model()
|
await load_model()
|
||||||
|
|
||||||
async with websockets.serve(handle_client, HOST, PORT):
|
async with websockets.serve(handle_client, HOST, PORT):
|
||||||
print(f"[Whisper] Server ready! Listening on ws://{HOST}:{PORT}")
|
print(f"[Whisper] Ready")
|
||||||
await asyncio.Future() # Run forever
|
await asyncio.Future() # Run forever
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user