feat: Add Whisper GPU speech-to-text with progressive transcription

- Add faster-whisper Python server for GPU-accelerated transcription
- Support dual mode: Web Speech API or Whisper GPU (toggleable)
- Progressive transcription every 3 seconds while recording
- Separate terminal server process (stable during hot-reload)
- Add Ctrl+V paste and Ctrl+C copy support in FloatingTerminal
- Add MCP tools: whisper_start, whisper_stop, whisper_toggle, whisper_status
- Update package.json with separate api/terminal/frontend processes
This commit is contained in:
2026-02-13 23:47:52 -06:00
parent e867b7873e
commit 638e6ac8e0
10 changed files with 1009 additions and 31 deletions

233
server/whisper_server.py Normal file
View File

@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
Whisper Server - GPU-accelerated speech-to-text using faster-whisper
WebSocket server that receives audio and returns transcriptions
"""
import asyncio
import json
import sys
import io
import wave
import tempfile
import os
from pathlib import Path
try:
import websockets
from faster_whisper import WhisperModel
except ImportError as e:
print(f"Missing dependency: {e}")
print("Run: pip install faster-whisper websockets")
sys.exit(1)
# Configuration
HOST = "localhost"
PORT = 4104
MODEL_SIZE = "medium" # tiny, base, small, medium, large-v2, large-v3
DEVICE = "cuda" # cuda or cpu
COMPUTE_TYPE = "float16" # float16 for GPU, int8 for CPU
# Global model instance
model = None
model_loading = False
async def load_model():
"""Load Whisper model (lazy loading on first request)"""
global model, model_loading
if model is not None:
return model
if model_loading:
# Wait for model to finish loading
while model_loading:
await asyncio.sleep(0.1)
return model
model_loading = True
print(f"[Whisper] Loading model '{MODEL_SIZE}' on {DEVICE}...")
try:
# Load model - this downloads on first run
model = WhisperModel(
MODEL_SIZE,
device=DEVICE,
compute_type=COMPUTE_TYPE,
download_root=str(Path.home() / ".cache" / "whisper")
)
print(f"[Whisper] Model loaded successfully!")
except Exception as e:
print(f"[Whisper] Error loading model: {e}")
print("[Whisper] Falling back to CPU...")
model = WhisperModel(
MODEL_SIZE,
device="cpu",
compute_type="int8",
download_root=str(Path.home() / ".cache" / "whisper")
)
model_loading = False
return model
def transcribe_audio(audio_data: bytes, language: str = "es") -> dict:
"""Transcribe audio data using Whisper"""
global model
if model is None:
return {"error": "Model not loaded"}
# Save audio to temp file (faster-whisper needs a file path)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
f.write(audio_data)
temp_path = f.name
try:
# Transcribe
segments, info = model.transcribe(
temp_path,
language=language,
beam_size=5,
vad_filter=True, # Voice activity detection
vad_parameters=dict(
min_silence_duration_ms=500,
speech_pad_ms=400
)
)
# Collect all segments
text = ""
segments_list = []
for segment in segments:
text += segment.text + " "
segments_list.append({
"start": segment.start,
"end": segment.end,
"text": segment.text
})
return {
"success": True,
"text": text.strip(),
"language": info.language,
"language_probability": info.language_probability,
"duration": info.duration,
"segments": segments_list,
"engine": "whisper-gpu",
"model": MODEL_SIZE,
"device": DEVICE
}
except Exception as e:
return {"error": str(e)}
finally:
# Cleanup temp file
try:
os.unlink(temp_path)
except:
pass
async def handle_client(websocket):
"""Handle WebSocket client connection"""
print(f"[Whisper] Client connected")
# Ensure model is loaded
await load_model()
# Send ready message
await websocket.send(json.dumps({
"type": "ready",
"model": MODEL_SIZE,
"device": DEVICE
}))
try:
async for message in websocket:
if isinstance(message, bytes):
# Binary audio data
print(f"[Whisper] Received {len(message)} bytes of audio")
# Transcribe in thread pool to not block
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
transcribe_audio,
message,
"es" # Default to Spanish
)
await websocket.send(json.dumps({
"type": "transcription",
**result
}))
else:
# JSON command
try:
cmd = json.loads(message)
if cmd.get("type") == "transcribe":
# Audio data sent as base64
import base64
audio_data = base64.b64decode(cmd.get("audio", ""))
language = cmd.get("language", "es")
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
transcribe_audio,
audio_data,
language
)
await websocket.send(json.dumps({
"type": "transcription",
**result
}))
elif cmd.get("type") == "ping":
await websocket.send(json.dumps({"type": "pong"}))
elif cmd.get("type") == "status":
await websocket.send(json.dumps({
"type": "status",
"model": MODEL_SIZE,
"device": DEVICE,
"ready": model is not None
}))
except json.JSONDecodeError:
await websocket.send(json.dumps({
"type": "error",
"message": "Invalid JSON"
}))
except websockets.exceptions.ConnectionClosed:
print("[Whisper] Client disconnected")
except Exception as e:
print(f"[Whisper] Error: {e}")
async def main():
"""Start WebSocket server"""
print(f"[Whisper] Starting server on ws://{HOST}:{PORT}")
print(f"[Whisper] Model: {MODEL_SIZE}, Device: {DEVICE}")
# Pre-load model
print("[Whisper] Pre-loading model...")
await load_model()
async with websockets.serve(handle_client, HOST, PORT):
print(f"[Whisper] Server ready! Listening on ws://{HOST}:{PORT}")
await asyncio.Future() # Run forever
if __name__ == "__main__":
# Install websockets if needed
try:
import websockets
except ImportError:
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "websockets"])
import websockets
asyncio.run(main())