- Add faster-whisper Python server for GPU-accelerated transcription - Support dual mode: Web Speech API or Whisper GPU (toggleable) - Progressive transcription every 3 seconds while recording - Separate terminal server process (stable during hot-reload) - Add Ctrl+V paste and Ctrl+C copy support in FloatingTerminal - Add MCP tools: whisper_start, whisper_stop, whisper_toggle, whisper_status - Update package.json with separate api/terminal/frontend processes
219 lines
5.5 KiB
TypeScript
219 lines
5.5 KiB
TypeScript
/**
|
|
* Whisper Service - Manages the Python Whisper server process
|
|
* Provides GPU-accelerated speech-to-text as an alternative to Web Speech API
|
|
*/
|
|
|
|
import { join } from 'path'
|
|
import { Subprocess } from 'bun'
|
|
|
|
const WHISPER_PORT = 4104
|
|
const WHISPER_SCRIPT = join(import.meta.dir, '..', 'whisper_server.py')
|
|
|
|
interface WhisperState {
|
|
enabled: boolean
|
|
running: boolean
|
|
process: Subprocess | null
|
|
model: string
|
|
device: string
|
|
}
|
|
|
|
const state: WhisperState = {
|
|
enabled: false,
|
|
running: false,
|
|
process: null,
|
|
model: 'medium',
|
|
device: 'cuda'
|
|
}
|
|
|
|
/**
|
|
* Kill any process using the Whisper port
|
|
*/
|
|
async function killProcessOnPort(port: number): Promise<void> {
|
|
try {
|
|
// Use PowerShell to find and kill process on port
|
|
const proc = Bun.spawn(['powershell', '-Command',
|
|
`Get-NetTCPConnection -LocalPort ${port} -ErrorAction SilentlyContinue | ForEach-Object { Stop-Process -Id $_.OwningProcess -Force -ErrorAction SilentlyContinue }`
|
|
], { stdout: 'ignore', stderr: 'ignore' })
|
|
await proc.exited
|
|
// Wait a moment for port to be released
|
|
await new Promise(resolve => setTimeout(resolve, 1000))
|
|
} catch {
|
|
// Ignore errors
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start the Whisper Python server
|
|
*/
|
|
export async function startWhisperServer(): Promise<boolean> {
|
|
if (state.running && state.process) {
|
|
console.log('[Whisper] Server already running')
|
|
return true
|
|
}
|
|
|
|
console.log('[Whisper] ====== STARTING (v3) ======')
|
|
console.log('[Whisper] Script:', WHISPER_SCRIPT)
|
|
|
|
// Kill any existing process on the port
|
|
console.log('[Whisper] Cleaning up port', WHISPER_PORT)
|
|
await killProcessOnPort(WHISPER_PORT)
|
|
|
|
try {
|
|
// Use Bun.spawn with inherit to show logs directly in console
|
|
const proc = Bun.spawn(['python', WHISPER_SCRIPT], {
|
|
cwd: join(import.meta.dir, '..'),
|
|
stdout: 'inherit',
|
|
stderr: 'inherit',
|
|
env: { ...process.env }
|
|
})
|
|
|
|
state.process = proc
|
|
|
|
// Wait a bit for the server to start, then check if port is listening
|
|
await new Promise(resolve => setTimeout(resolve, 3000))
|
|
|
|
// Check if process is still running
|
|
if (proc.exitCode !== null) {
|
|
console.error('[Whisper] Process exited with code:', proc.exitCode)
|
|
state.process = null
|
|
return false
|
|
}
|
|
|
|
// Check if port is listening (simple TCP check)
|
|
const isListening = await checkPort(WHISPER_PORT)
|
|
|
|
if (isListening) {
|
|
console.log('[Whisper] Server started successfully on port', WHISPER_PORT)
|
|
state.running = true
|
|
state.enabled = true
|
|
return true
|
|
}
|
|
|
|
// Wait more if model is still loading (up to 90 seconds total)
|
|
console.log('[Whisper] Waiting for model to load...')
|
|
for (let i = 0; i < 30; i++) {
|
|
await new Promise(resolve => setTimeout(resolve, 3000))
|
|
|
|
if (proc.exitCode !== null) {
|
|
console.error('[Whisper] Process died while loading')
|
|
state.process = null
|
|
return false
|
|
}
|
|
|
|
if (await checkPort(WHISPER_PORT)) {
|
|
console.log('[Whisper] Server ready!')
|
|
state.running = true
|
|
state.enabled = true
|
|
return true
|
|
}
|
|
}
|
|
|
|
console.log('[Whisper] Timeout waiting for server')
|
|
return false
|
|
|
|
} catch (err: any) {
|
|
console.error('[Whisper] Failed to start:', err.message)
|
|
state.process = null
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a port is listening using PowerShell
|
|
*/
|
|
async function checkPort(port: number): Promise<boolean> {
|
|
try {
|
|
const proc = Bun.spawn(['powershell', '-Command',
|
|
`if (Get-NetTCPConnection -LocalPort ${port} -State Listen -ErrorAction SilentlyContinue) { exit 0 } else { exit 1 }`
|
|
], { stdout: 'ignore', stderr: 'ignore' })
|
|
|
|
const exitCode = await proc.exited
|
|
return exitCode === 0
|
|
} catch {
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Stop the Whisper server
|
|
*/
|
|
export function stopWhisperServer(): boolean {
|
|
if (!state.process) {
|
|
console.log('[Whisper] No server running')
|
|
return true
|
|
}
|
|
|
|
console.log('[Whisper] Stopping server...')
|
|
|
|
try {
|
|
state.process.kill()
|
|
state.process = null
|
|
state.running = false
|
|
state.enabled = false
|
|
console.log('[Whisper] Server stopped')
|
|
return true
|
|
} catch (err) {
|
|
console.error('[Whisper] Error stopping server:', err)
|
|
return false
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Toggle Whisper server on/off
|
|
*/
|
|
export async function toggleWhisperServer(): Promise<{ enabled: boolean; success: boolean }> {
|
|
if (state.enabled && state.running) {
|
|
const success = stopWhisperServer()
|
|
return { enabled: false, success }
|
|
} else {
|
|
const success = await startWhisperServer()
|
|
return { enabled: success, success }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current Whisper state (checks real port status)
|
|
*/
|
|
export async function getWhisperState(): Promise<{
|
|
enabled: boolean
|
|
running: boolean
|
|
port: number
|
|
model: string
|
|
device: string
|
|
}> {
|
|
// Check if port is actually listening
|
|
const isListening = await checkPort(WHISPER_PORT)
|
|
|
|
// Sync state with reality
|
|
if (isListening && !state.running) {
|
|
state.running = true
|
|
state.enabled = true
|
|
} else if (!isListening && state.running) {
|
|
state.running = false
|
|
state.enabled = false
|
|
state.process = null
|
|
}
|
|
|
|
return {
|
|
enabled: state.enabled,
|
|
running: state.running,
|
|
port: WHISPER_PORT,
|
|
model: state.model,
|
|
device: state.device
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if Whisper is enabled
|
|
*/
|
|
export function isWhisperEnabled(): boolean {
|
|
return state.enabled && state.running
|
|
}
|
|
|
|
// WebSocket server for Whisper (proxies to Python server or handles directly)
|
|
let whisperWsServer: any = null
|
|
|
|
export function getWhisperPort(): number {
|
|
return WHISPER_PORT
|
|
}
|