Files
agent-ui/server/routes/voice-transcript.ts
josedario87 65303df96a feat: Samsung lock screen face widget, voice assistant services, PiP mode and gitignore installers
Add Samsung proprietary Face Widget (lock screen/AOD) with terminal status display.
Add voice interaction services (AgentVoiceInteractionService, RecognitionService) for
digital assistant registration. Add PiP mode with voice/expand actions. Add session-state
proxy, voice transcript routes, window controls component. Ignore installers/ directory.
2026-02-23 20:52:11 -06:00

106 lines
3.5 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { jsonResponse } from '../utils/cors'
import { PORT_TERMINAL } from '../config'
export async function handleVoiceTranscript(req: Request): Promise<Response | null> {
if (req.method !== 'POST') return null
try {
const body = await req.json() as { text?: string; timestamp?: string; source?: string }
const { text, timestamp, source } = body
if (!text) {
return jsonResponse({ error: 'Missing "text" field' }, 400)
}
const ts = timestamp || new Date().toISOString()
const src = source || 'android-voice'
console.log(`\n🎙 [VOICE TRANSCRIPT] ────────────────────────`)
console.log(` Source: ${src}`)
console.log(` Time: ${ts}`)
console.log(` Text: "${text}"`)
// Find first alive terminal and send the text as input
const result = await sendToFirstTerminal(text)
console.log(` Terminal: ${result.terminal || 'none found'}`)
console.log(` Status: ${result.sent ? 'sent ✓' : result.error || 'no terminal'}`)
console.log(` ──────────────────────────────────────────────\n`)
return jsonResponse({
ok: true,
received: text,
timestamp: ts,
source: src,
sentToTerminal: result.sent,
terminal: result.terminal || null,
ephemeralSessionId: result.ephemeralSessionId || null
})
} catch (e: any) {
console.error('[voice-transcript] Parse error:', e.message)
return jsonResponse({ error: 'Invalid JSON body' }, 400)
}
}
async function sendToFirstTerminal(text: string): Promise<{ sent: boolean; terminal?: string; ephemeralSessionId?: string; error?: string }> {
try {
// Fetch terminal registry to find alive terminals
const res = await fetch(`http://localhost:${PORT_TERMINAL}/terminal-registry`)
if (!res.ok) {
return { sent: false, error: `registry fetch failed: ${res.status}` }
}
const registry = await res.json() as Array<{
ephemeralSessionId: string
agent: string
label: string
alive: boolean
}>
// Find first alive terminal
const target = registry.find(t => t.alive)
if (!target) {
return { sent: false, error: 'no alive terminals' }
}
// Connect via WebSocket and send the text as input
const wsUrl = `ws://localhost:${PORT_TERMINAL}/ws/terminal?session=${target.ephemeralSessionId}`
return new Promise((resolve) => {
const ws = new WebSocket(wsUrl)
const timeout = setTimeout(() => {
try { ws.close() } catch {}
resolve({ sent: false, terminal: target.ephemeralSessionId, error: 'ws timeout' })
}, 5000)
ws.onopen = () => {
// Send the transcribed text
ws.send(JSON.stringify({ type: 'input', data: text }))
// Send Enter after a short delay
setTimeout(() => {
ws.send(JSON.stringify({ type: 'input', data: '\r' }))
// Close after sending
setTimeout(() => {
clearTimeout(timeout)
ws.close()
resolve({
sent: true,
terminal: `${target.ephemeralSessionId} (${target.agent})`,
ephemeralSessionId: target.ephemeralSessionId
})
}, 150)
}, 80)
}
ws.onerror = (err) => {
clearTimeout(timeout)
resolve({ sent: false, terminal: target.ephemeralSessionId, error: 'ws connection error' })
}
})
} catch (e: any) {
return { sent: false, error: e.message }
}
}