fix: Improve Whisper server startup with async polling and reduce logs
- Make server startup async to avoid Bun's 10s timeout - Add frontend polling to detect when server is ready - Use PowerShell Get-NetTCPConnection for reliable port detection - Add starting state to prevent multiple simultaneous starts - Reduce verbose logging, keep only essential info - Add dev-dist and nul to gitignore
This commit is contained in:
@@ -79,10 +79,13 @@ def convert_audio_to_wav(input_data: bytes, input_format: str = "webm") -> bytes
|
||||
# Configuration
|
||||
HOST = "localhost"
|
||||
PORT = 4104
|
||||
MODEL_SIZE = "large-v3" # tiny, base, small, medium, large-v2, large-v3
|
||||
MODEL_SIZE = "large-v3" # Best standard model for Spanish
|
||||
DEVICE = "cuda" # cuda or cpu
|
||||
COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU
|
||||
|
||||
# Model display name (extract from path if needed)
|
||||
MODEL_NAME = MODEL_SIZE.split("/")[-1] if "/" in MODEL_SIZE else MODEL_SIZE
|
||||
|
||||
# Spanish context prompt to improve accuracy (Honduras Spanish + tech context)
|
||||
INITIAL_PROMPT = """Transcripción en español hondureño de un desarrollador de software.
|
||||
Contexto: programación, TypeScript, Vue, Python, comandos de terminal, código.
|
||||
@@ -109,7 +112,7 @@ async def load_model():
|
||||
return model
|
||||
|
||||
model_loading = True
|
||||
print(f"[Whisper] Loading model '{MODEL_SIZE}' on {DEVICE}...")
|
||||
print(f"[Whisper] Loading model '{MODEL_NAME}' on {DEVICE}...")
|
||||
|
||||
try:
|
||||
# Load model - this downloads on first run
|
||||
@@ -140,15 +143,11 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
||||
if model is None:
|
||||
return {"error": "Model not loaded"}
|
||||
|
||||
print(f"[Whisper] Received {len(audio_data)} bytes of audio data")
|
||||
|
||||
# Convert WebM to WAV if needed
|
||||
if is_webm:
|
||||
print("[Whisper] Converting WebM to WAV...")
|
||||
wav_data = convert_audio_to_wav(audio_data, "webm")
|
||||
if wav_data is None:
|
||||
return {"error": "Failed to convert audio format. Ensure ffmpeg is installed."}
|
||||
print(f"[Whisper] Converted to {len(wav_data)} bytes WAV")
|
||||
else:
|
||||
wav_data = audio_data
|
||||
|
||||
@@ -159,7 +158,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
||||
|
||||
try:
|
||||
# Transcribe with optimized parameters
|
||||
print(f"[Whisper] Transcribing {temp_path}...")
|
||||
segments, info = model.transcribe(
|
||||
temp_path,
|
||||
language=language,
|
||||
@@ -191,7 +189,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
||||
"text": segment.text
|
||||
})
|
||||
|
||||
print(f"[Whisper] Transcription result: '{text.strip()}'")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
@@ -201,7 +198,7 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
||||
"duration": info.duration,
|
||||
"segments": segments_list,
|
||||
"engine": "whisper-gpu",
|
||||
"model": MODEL_SIZE,
|
||||
"model": MODEL_NAME,
|
||||
"device": DEVICE
|
||||
}
|
||||
|
||||
@@ -218,7 +215,6 @@ def transcribe_audio(audio_data: bytes, language: str = "es", is_webm: bool = Tr
|
||||
|
||||
async def handle_client(websocket):
|
||||
"""Handle WebSocket client connection"""
|
||||
print(f"[Whisper] Client connected")
|
||||
|
||||
# Ensure model is loaded
|
||||
await load_model()
|
||||
@@ -226,7 +222,7 @@ async def handle_client(websocket):
|
||||
# Send ready message
|
||||
await websocket.send(json.dumps({
|
||||
"type": "ready",
|
||||
"model": MODEL_SIZE,
|
||||
"model": MODEL_NAME,
|
||||
"device": DEVICE
|
||||
}))
|
||||
|
||||
@@ -234,8 +230,6 @@ async def handle_client(websocket):
|
||||
async for message in websocket:
|
||||
if isinstance(message, bytes):
|
||||
# Binary audio data (likely WebM format from browser)
|
||||
print(f"[Whisper] Received {len(message)} bytes of binary audio")
|
||||
|
||||
# Transcribe in thread pool to not block
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
@@ -260,8 +254,6 @@ async def handle_client(websocket):
|
||||
language = cmd.get("language", "es")
|
||||
is_partial = cmd.get("partial", False)
|
||||
|
||||
print(f"[Whisper] Transcribe request: {len(audio_data)} bytes, lang={language}, partial={is_partial}")
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
@@ -283,7 +275,7 @@ async def handle_client(websocket):
|
||||
elif cmd.get("type") == "status":
|
||||
await websocket.send(json.dumps({
|
||||
"type": "status",
|
||||
"model": MODEL_SIZE,
|
||||
"model": MODEL_NAME,
|
||||
"device": DEVICE,
|
||||
"ready": model is not None
|
||||
}))
|
||||
@@ -295,21 +287,19 @@ async def handle_client(websocket):
|
||||
}))
|
||||
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
print("[Whisper] Client disconnected")
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"[Whisper] Error: {e}")
|
||||
|
||||
async def main():
|
||||
"""Start WebSocket server"""
|
||||
print(f"[Whisper] Starting server on ws://{HOST}:{PORT}")
|
||||
print(f"[Whisper] Model: {MODEL_SIZE}, Device: {DEVICE}")
|
||||
print(f"[Whisper] Model: {MODEL_NAME} | Device: {DEVICE} | Port: {PORT}")
|
||||
|
||||
# Pre-load model
|
||||
print("[Whisper] Pre-loading model...")
|
||||
await load_model()
|
||||
|
||||
async with websockets.serve(handle_client, HOST, PORT):
|
||||
print(f"[Whisper] Server ready! Listening on ws://{HOST}:{PORT}")
|
||||
print(f"[Whisper] Ready")
|
||||
await asyncio.Future() # Run forever
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user