feat: Add transcript engine API and connect ConversationHistory to real data
- Add transcript-engine service that parses Claude Code JSONL transcripts with session listing, message extraction, token/stats analysis, and caching - Add transcript REST routes (sessions list, latest, by session ID, section filtering) - Rewrite ConversationHistory to fetch from /api/transcript/* instead of mock data - Add session pills for switching between conversation sessions - Add stats bar footer with model, duration, tokens, and tool count - Add TranscriptSession/TranscriptMessage types, ChatInput, InputSettings, PromptBar updates, TranscriptCard, and useVoiceCapture composable
This commit is contained in:
574
server/services/transcript-engine.ts
Normal file
574
server/services/transcript-engine.ts
Normal file
@@ -0,0 +1,574 @@
|
||||
// Transcript Engine - Parses Claude Code JSONL transcripts
|
||||
// Module-level state pattern (like terminal.ts, torch-handler.ts)
|
||||
|
||||
import { existsSync, readFileSync, statSync, readdirSync } from 'fs'
|
||||
import { join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
import { WORKING_DIR } from '../config'
|
||||
|
||||
// ── Types ──
|
||||
|
||||
export interface TranscriptAnalysis {
|
||||
sessionId: string
|
||||
model: string
|
||||
version: string
|
||||
gitBranch: string
|
||||
cwd: string
|
||||
startTime: string
|
||||
endTime: string
|
||||
duration: number
|
||||
messages: TranscriptMessage[]
|
||||
tokens: {
|
||||
totalInput: number
|
||||
totalOutput: number
|
||||
totalCacheRead: number
|
||||
totalCacheCreation: number
|
||||
byTurn: TurnTokens[]
|
||||
}
|
||||
tools: {
|
||||
summary: Record<string, number>
|
||||
calls: ToolCall[]
|
||||
}
|
||||
filesModified: string[]
|
||||
subagents: SubagentInfo[]
|
||||
summaries: string[]
|
||||
stats: {
|
||||
messageCount: number
|
||||
userMessageCount: number
|
||||
assistantMessageCount: number
|
||||
toolCallCount: number
|
||||
thinkingBlocks: number
|
||||
errors: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface TranscriptMessage {
|
||||
uuid: string
|
||||
role: 'user' | 'assistant'
|
||||
content: string
|
||||
timestamp: string
|
||||
isMeta: boolean
|
||||
tokens?: { input: number; output: number }
|
||||
toolCalls?: string[]
|
||||
hasThinking: boolean
|
||||
}
|
||||
|
||||
export interface ToolCall {
|
||||
name: string
|
||||
input: unknown
|
||||
output?: string
|
||||
timestamp: string
|
||||
isError: boolean
|
||||
}
|
||||
|
||||
export interface TurnTokens {
|
||||
turnIndex: number
|
||||
input: number
|
||||
output: number
|
||||
cacheRead: number
|
||||
cacheCreation: number
|
||||
model: string
|
||||
}
|
||||
|
||||
export interface SubagentInfo {
|
||||
agentId: string
|
||||
prompt: string
|
||||
timestamp: string
|
||||
}
|
||||
|
||||
export interface SessionInfo {
|
||||
id: string
|
||||
startTime: string
|
||||
messageCount: number
|
||||
model: string
|
||||
}
|
||||
|
||||
// ── Module-level cache ──
|
||||
|
||||
const cache = new Map<string, {
|
||||
analysis: TranscriptAnalysis
|
||||
lastModified: number
|
||||
}>()
|
||||
|
||||
// ── Project hash ──
|
||||
|
||||
function getProjectHash(): string {
|
||||
// C:\Users\jodar\agent-ui → C--Users-jodar-agent-ui
|
||||
return WORKING_DIR.replace(/[\\/]/g, '-').replace(/:/g, '-')
|
||||
}
|
||||
|
||||
function getProjectDir(): string {
|
||||
return join(homedir(), '.claude', 'projects', getProjectHash())
|
||||
}
|
||||
|
||||
// ── Path resolution ──
|
||||
|
||||
function resolveTranscriptPath(sessionId?: string): string | null {
|
||||
const projectDir = getProjectDir()
|
||||
if (!existsSync(projectDir)) return null
|
||||
|
||||
if (sessionId && sessionId !== 'latest') {
|
||||
const filePath = join(projectDir, `${sessionId}.jsonl`)
|
||||
return existsSync(filePath) ? filePath : null
|
||||
}
|
||||
|
||||
// Find most recent by mtime
|
||||
try {
|
||||
const files = readdirSync(projectDir)
|
||||
.filter(f => f.endsWith('.jsonl'))
|
||||
.map(f => {
|
||||
const fullPath = join(projectDir, f)
|
||||
return { name: f, path: fullPath, mtime: statSync(fullPath).mtimeMs }
|
||||
})
|
||||
.sort((a, b) => b.mtime - a.mtime)
|
||||
|
||||
return files.length > 0 ? files[0].path : null
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
function sessionIdFromPath(filePath: string): string {
|
||||
const basename = filePath.split(/[\\/]/).pop() || ''
|
||||
return basename.replace('.jsonl', '')
|
||||
}
|
||||
|
||||
// ── Helpers ──
|
||||
|
||||
function truncate(str: string, maxLen: number): string {
|
||||
if (!str || str.length <= maxLen) return str || ''
|
||||
return str.slice(0, maxLen) + '...'
|
||||
}
|
||||
|
||||
function extractText(content: any): string {
|
||||
if (typeof content === 'string') {
|
||||
return content.replace(/<[^>]+>/g, '').trim()
|
||||
}
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.filter((c: any) => c.type === 'text')
|
||||
.map((c: any) => c.text || '')
|
||||
.join('\n')
|
||||
.replace(/<[^>]+>/g, '')
|
||||
.trim()
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
// ── JSONL parsing ──
|
||||
|
||||
interface ParsedLine {
|
||||
type: string
|
||||
data: any
|
||||
}
|
||||
|
||||
function parseTranscriptFile(filePath: string): ParsedLine[] {
|
||||
const content = readFileSync(filePath, 'utf8')
|
||||
const rawLines = content.trim().split('\n')
|
||||
const lines: ParsedLine[] = []
|
||||
|
||||
for (const line of rawLines) {
|
||||
if (!line.trim()) continue
|
||||
try {
|
||||
const obj = JSON.parse(line)
|
||||
lines.push({ type: obj.type, data: obj })
|
||||
} catch {
|
||||
// Skip unparseable lines
|
||||
}
|
||||
}
|
||||
|
||||
return lines
|
||||
}
|
||||
|
||||
// ── Build analysis from parsed lines ──
|
||||
|
||||
function buildAnalysis(lines: ParsedLine[], fileSessionId: string): TranscriptAnalysis {
|
||||
let sessionId = fileSessionId
|
||||
let model = ''
|
||||
let version = ''
|
||||
let gitBranch = ''
|
||||
let cwd = ''
|
||||
let startTime = ''
|
||||
let endTime = ''
|
||||
|
||||
const messages: TranscriptMessage[] = []
|
||||
const toolCalls: ToolCall[] = []
|
||||
const filesModified = new Set<string>()
|
||||
const subagents: SubagentInfo[] = []
|
||||
const summaries: string[] = []
|
||||
const turnTokens: TurnTokens[] = []
|
||||
|
||||
let totalInput = 0
|
||||
let totalOutput = 0
|
||||
let totalCacheRead = 0
|
||||
let totalCacheCreation = 0
|
||||
let thinkingBlocks = 0
|
||||
let errors = 0
|
||||
|
||||
// Track assistant message chunks by message.id (streaming chunks share the same id)
|
||||
const assistantChunks = new Map<string, {
|
||||
uuid: string
|
||||
timestamp: string
|
||||
model: string
|
||||
textParts: string[]
|
||||
toolNames: string[]
|
||||
hasThinking: boolean
|
||||
usage: any
|
||||
pendingToolCalls: { name: string; input: any; id: string; timestamp: string }[]
|
||||
}>()
|
||||
|
||||
// Track tool results by tool_use_id
|
||||
const toolResults = new Map<string, { content: string; isError: boolean }>()
|
||||
|
||||
// ── First pass: collect all data ──
|
||||
for (const { type, data } of lines) {
|
||||
// Extract metadata from first message that has it
|
||||
if (data.sessionId && !sessionId) sessionId = data.sessionId
|
||||
if (data.version && !version) version = data.version
|
||||
if (data.gitBranch && !gitBranch) gitBranch = data.gitBranch
|
||||
if (data.cwd && !cwd) cwd = data.cwd
|
||||
|
||||
// Track time bounds
|
||||
if (data.timestamp) {
|
||||
if (!startTime || data.timestamp < startTime) startTime = data.timestamp
|
||||
if (!endTime || data.timestamp > endTime) endTime = data.timestamp
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case 'user': {
|
||||
const msg = data.message
|
||||
if (!msg) break
|
||||
|
||||
// Collect tool results (user messages contain tool_result blocks)
|
||||
if (Array.isArray(msg.content)) {
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'tool_result') {
|
||||
const resultText = typeof block.content === 'string'
|
||||
? block.content
|
||||
: Array.isArray(block.content)
|
||||
? block.content.map((c: any) => c.text || '').join('\n')
|
||||
: ''
|
||||
toolResults.set(block.tool_use_id, {
|
||||
content: truncate(resultText, 300),
|
||||
isError: !!block.is_error
|
||||
})
|
||||
if (block.is_error) errors++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add as user message (skip meta, skip tool-result-only messages)
|
||||
const isMeta = !!data.isMeta
|
||||
const text = extractText(msg.content)
|
||||
const hasToolResult = Array.isArray(msg.content) &&
|
||||
msg.content.some((c: any) => c.type === 'tool_result')
|
||||
|
||||
if (text && !hasToolResult) {
|
||||
messages.push({
|
||||
uuid: data.uuid || '',
|
||||
role: 'user',
|
||||
content: text,
|
||||
timestamp: data.timestamp || '',
|
||||
isMeta,
|
||||
hasThinking: false
|
||||
})
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case 'assistant': {
|
||||
const msg = data.message
|
||||
if (!msg || msg.role !== 'assistant') break
|
||||
|
||||
const msgId = msg.id || data.uuid
|
||||
if (!model && msg.model) model = msg.model
|
||||
|
||||
let chunk = assistantChunks.get(msgId)
|
||||
if (!chunk) {
|
||||
chunk = {
|
||||
uuid: data.uuid || '',
|
||||
timestamp: data.timestamp || '',
|
||||
model: msg.model || '',
|
||||
textParts: [],
|
||||
toolNames: [],
|
||||
hasThinking: false,
|
||||
usage: null,
|
||||
pendingToolCalls: []
|
||||
}
|
||||
assistantChunks.set(msgId, chunk)
|
||||
}
|
||||
|
||||
// Take latest usage (streaming chunks repeat usage, last is most accurate)
|
||||
if (msg.usage) chunk.usage = msg.usage
|
||||
|
||||
// Process content blocks (each JSONL line typically has one block)
|
||||
if (Array.isArray(msg.content)) {
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'text' && block.text?.trim()) {
|
||||
chunk.textParts.push(block.text)
|
||||
} else if (block.type === 'thinking') {
|
||||
chunk.hasThinking = true
|
||||
thinkingBlocks++
|
||||
} else if (block.type === 'tool_use') {
|
||||
chunk.toolNames.push(block.name)
|
||||
chunk.pendingToolCalls.push({
|
||||
name: block.name,
|
||||
input: block.input,
|
||||
id: block.id,
|
||||
timestamp: data.timestamp || ''
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case 'progress': {
|
||||
if (data.data?.type === 'agent_progress' && data.data.agentId) {
|
||||
const existing = subagents.find(s => s.agentId === data.data.agentId)
|
||||
if (!existing) {
|
||||
subagents.push({
|
||||
agentId: data.data.agentId,
|
||||
prompt: truncate(data.data.prompt || '', 200),
|
||||
timestamp: data.timestamp || ''
|
||||
})
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case 'file-history-snapshot': {
|
||||
const backups = data.snapshot?.trackedFileBackups
|
||||
if (backups && typeof backups === 'object') {
|
||||
for (const filePath of Object.keys(backups)) {
|
||||
filesModified.add(filePath)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case 'summary': {
|
||||
const summaryText = data.summary || data.message?.content
|
||||
if (summaryText) {
|
||||
summaries.push(truncate(
|
||||
typeof summaryText === 'string' ? summaryText : JSON.stringify(summaryText),
|
||||
1000
|
||||
))
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Second pass: assemble assistant messages and finalize tool calls ──
|
||||
let turnIndex = 0
|
||||
for (const [, chunk] of assistantChunks) {
|
||||
const text = chunk.textParts.join('\n').trim()
|
||||
|
||||
if (text || chunk.toolNames.length > 0) {
|
||||
const msgTokens = chunk.usage
|
||||
? { input: chunk.usage.input_tokens || 0, output: chunk.usage.output_tokens || 0 }
|
||||
: undefined
|
||||
|
||||
messages.push({
|
||||
uuid: chunk.uuid,
|
||||
role: 'assistant',
|
||||
content: text || `[Tool calls: ${chunk.toolNames.join(', ')}]`,
|
||||
timestamp: chunk.timestamp,
|
||||
isMeta: false,
|
||||
tokens: msgTokens,
|
||||
toolCalls: chunk.toolNames.length > 0 ? chunk.toolNames : undefined,
|
||||
hasThinking: chunk.hasThinking
|
||||
})
|
||||
}
|
||||
|
||||
// Finalize tool calls with results
|
||||
for (const tc of chunk.pendingToolCalls) {
|
||||
const result = toolResults.get(tc.id)
|
||||
const inputStr = typeof tc.input === 'string' ? tc.input : JSON.stringify(tc.input)
|
||||
toolCalls.push({
|
||||
name: tc.name,
|
||||
input: truncate(inputStr, 500),
|
||||
output: result?.content,
|
||||
timestamp: tc.timestamp,
|
||||
isError: result?.isError || false
|
||||
})
|
||||
}
|
||||
|
||||
// Token tracking per turn
|
||||
if (chunk.usage) {
|
||||
const u = chunk.usage
|
||||
const input = u.input_tokens || 0
|
||||
const output = u.output_tokens || 0
|
||||
const cacheRead = u.cache_read_input_tokens || 0
|
||||
const cacheCreation = u.cache_creation_input_tokens || 0
|
||||
|
||||
totalInput += input
|
||||
totalOutput += output
|
||||
totalCacheRead += cacheRead
|
||||
totalCacheCreation += cacheCreation
|
||||
|
||||
turnTokens.push({
|
||||
turnIndex: turnIndex++,
|
||||
input,
|
||||
output,
|
||||
cacheRead,
|
||||
cacheCreation,
|
||||
model: chunk.model
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Sort messages chronologically
|
||||
messages.sort((a, b) => a.timestamp.localeCompare(b.timestamp))
|
||||
|
||||
// Build tool summary
|
||||
const toolSummary: Record<string, number> = {}
|
||||
for (const tc of toolCalls) {
|
||||
toolSummary[tc.name] = (toolSummary[tc.name] || 0) + 1
|
||||
}
|
||||
|
||||
// Extract files from Edit/Write tool calls
|
||||
for (const tc of toolCalls) {
|
||||
if (['Edit', 'Write', 'NotebookEdit'].includes(tc.name) && tc.input) {
|
||||
try {
|
||||
const input = typeof tc.input === 'string' ? JSON.parse(tc.input) : tc.input
|
||||
if (input.file_path) filesModified.add(input.file_path)
|
||||
if (input.notebook_path) filesModified.add(input.notebook_path)
|
||||
} catch { /* skip */ }
|
||||
}
|
||||
}
|
||||
|
||||
const duration = startTime && endTime
|
||||
? new Date(endTime).getTime() - new Date(startTime).getTime()
|
||||
: 0
|
||||
|
||||
const userMsgCount = messages.filter(m => m.role === 'user').length
|
||||
const assistantMsgCount = messages.filter(m => m.role === 'assistant').length
|
||||
|
||||
return {
|
||||
sessionId,
|
||||
model,
|
||||
version,
|
||||
gitBranch,
|
||||
cwd,
|
||||
startTime,
|
||||
endTime,
|
||||
duration,
|
||||
messages,
|
||||
tokens: {
|
||||
totalInput,
|
||||
totalOutput,
|
||||
totalCacheRead,
|
||||
totalCacheCreation,
|
||||
byTurn: turnTokens
|
||||
},
|
||||
tools: {
|
||||
summary: toolSummary,
|
||||
calls: toolCalls
|
||||
},
|
||||
filesModified: [...filesModified],
|
||||
subagents,
|
||||
summaries,
|
||||
stats: {
|
||||
messageCount: messages.length,
|
||||
userMessageCount: userMsgCount,
|
||||
assistantMessageCount: assistantMsgCount,
|
||||
toolCallCount: toolCalls.length,
|
||||
thinkingBlocks,
|
||||
errors
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Exported API ──
|
||||
|
||||
export function getTranscriptAnalysis(sessionId?: string): TranscriptAnalysis | null {
|
||||
const filePath = resolveTranscriptPath(sessionId)
|
||||
if (!filePath) return null
|
||||
|
||||
const sid = sessionIdFromPath(filePath)
|
||||
|
||||
try {
|
||||
const stat = statSync(filePath)
|
||||
const mtime = stat.mtimeMs
|
||||
|
||||
// Return cached if file hasn't changed
|
||||
const cached = cache.get(sid)
|
||||
if (cached && cached.lastModified === mtime) {
|
||||
return cached.analysis
|
||||
}
|
||||
|
||||
// Full parse
|
||||
const lines = parseTranscriptFile(filePath)
|
||||
const analysis = buildAnalysis(lines, sid)
|
||||
|
||||
cache.set(sid, { analysis, lastModified: mtime })
|
||||
return analysis
|
||||
} catch (e) {
|
||||
console.error('[transcript-engine] Error parsing transcript:', e)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export function listSessions(): SessionInfo[] {
|
||||
const projectDir = getProjectDir()
|
||||
if (!existsSync(projectDir)) return []
|
||||
|
||||
try {
|
||||
const files = readdirSync(projectDir)
|
||||
.filter(f => f.endsWith('.jsonl'))
|
||||
.map(f => {
|
||||
const fullPath = join(projectDir, f)
|
||||
const stat = statSync(fullPath)
|
||||
return { name: f, path: fullPath, mtime: stat.mtimeMs }
|
||||
})
|
||||
.sort((a, b) => b.mtime - a.mtime)
|
||||
|
||||
return files.map(f => {
|
||||
const sid = f.name.replace('.jsonl', '')
|
||||
|
||||
// Try cache first for quick metadata
|
||||
const cached = cache.get(sid)
|
||||
if (cached && cached.lastModified === f.mtime) {
|
||||
return {
|
||||
id: sid,
|
||||
startTime: cached.analysis.startTime,
|
||||
messageCount: cached.analysis.stats.messageCount,
|
||||
model: cached.analysis.model
|
||||
}
|
||||
}
|
||||
|
||||
// Quick scan: read first few lines for metadata without full parse
|
||||
try {
|
||||
const content = readFileSync(f.path, 'utf8')
|
||||
const firstLines = content.split('\n').slice(0, 20)
|
||||
let startTime = ''
|
||||
let model = ''
|
||||
let lineCount = content.split('\n').filter(l => l.trim()).length
|
||||
|
||||
for (const line of firstLines) {
|
||||
if (!line.trim()) continue
|
||||
try {
|
||||
const obj = JSON.parse(line)
|
||||
if (obj.timestamp && !startTime) startTime = obj.timestamp
|
||||
if (obj.type === 'assistant' && obj.message?.model && !model) {
|
||||
model = obj.message.model
|
||||
}
|
||||
} catch { /* skip */ }
|
||||
}
|
||||
|
||||
return {
|
||||
id: sid,
|
||||
startTime,
|
||||
messageCount: lineCount,
|
||||
model
|
||||
}
|
||||
} catch {
|
||||
return { id: sid, startTime: '', messageCount: 0, model: '' }
|
||||
}
|
||||
})
|
||||
} catch {
|
||||
return []
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user