feat: Add transcript engine API and connect ConversationHistory to real data

- Add transcript-engine service that parses Claude Code JSONL transcripts with session listing, message extraction, token/stats analysis, and caching - Add transcript REST routes (sessions list, latest, by session ID, section filtering) - Rewrite ConversationHistory to fetch from /api/transcript/* instead of mock data - Add session pills for switching between conversation sessions - Add stats bar footer with model, duration, tokens, and tool count - Add TranscriptSession/TranscriptMessage types, ChatInput, InputSettings, PromptBar updates, TranscriptCard, and useVoiceCapture composable
2026-02-15 20:05:27 -06:00
parent 68edc01d44
commit f3ac7986ec
10 changed files with 2246 additions and 97 deletions
--- a/server/services/transcript-engine.ts
+++ b/server/services/transcript-engine.ts
@@ -0,0 +1,574 @@
+// Transcript Engine - Parses Claude Code JSONL transcripts
+// Module-level state pattern (like terminal.ts, torch-handler.ts)
+
+import { existsSync, readFileSync, statSync, readdirSync } from 'fs'
+import { join } from 'path'
+import { homedir } from 'os'
+import { WORKING_DIR } from '../config'
+
+// ── Types ──
+
+export interface TranscriptAnalysis {
+  sessionId: string
+  model: string
+  version: string
+  gitBranch: string
+  cwd: string
+  startTime: string
+  endTime: string
+  duration: number
+  messages: TranscriptMessage[]
+  tokens: {
+    totalInput: number
+    totalOutput: number
+    totalCacheRead: number
+    totalCacheCreation: number
+    byTurn: TurnTokens[]
+  }
+  tools: {
+    summary: Record<string, number>
+    calls: ToolCall[]
+  }
+  filesModified: string[]
+  subagents: SubagentInfo[]
+  summaries: string[]
+  stats: {
+    messageCount: number
+    userMessageCount: number
+    assistantMessageCount: number
+    toolCallCount: number
+    thinkingBlocks: number
+    errors: number
+  }
+}
+
+export interface TranscriptMessage {
+  uuid: string
+  role: 'user' | 'assistant'
+  content: string
+  timestamp: string
+  isMeta: boolean
+  tokens?: { input: number; output: number }
+  toolCalls?: string[]
+  hasThinking: boolean
+}
+
+export interface ToolCall {
+  name: string
+  input: unknown
+  output?: string
+  timestamp: string
+  isError: boolean
+}
+
+export interface TurnTokens {
+  turnIndex: number
+  input: number
+  output: number
+  cacheRead: number
+  cacheCreation: number
+  model: string
+}
+
+export interface SubagentInfo {
+  agentId: string
+  prompt: string
+  timestamp: string
+}
+
+export interface SessionInfo {
+  id: string
+  startTime: string
+  messageCount: number
+  model: string
+}
+
+// ── Module-level cache ──
+
+const cache = new Map<string, {
+  analysis: TranscriptAnalysis
+  lastModified: number
+}>()
+
+// ── Project hash ──
+
+function getProjectHash(): string {
+  // C:\Users\jodar\agent-ui → C--Users-jodar-agent-ui
+  return WORKING_DIR.replace(/[\\/]/g, '-').replace(/:/g, '-')
+}
+
+function getProjectDir(): string {
+  return join(homedir(), '.claude', 'projects', getProjectHash())
+}
+
+// ── Path resolution ──
+
+function resolveTranscriptPath(sessionId?: string): string | null {
+  const projectDir = getProjectDir()
+  if (!existsSync(projectDir)) return null
+
+  if (sessionId && sessionId !== 'latest') {
+    const filePath = join(projectDir, `${sessionId}.jsonl`)
+    return existsSync(filePath) ? filePath : null
+  }
+
+  // Find most recent by mtime
+  try {
+    const files = readdirSync(projectDir)
+      .filter(f => f.endsWith('.jsonl'))
+      .map(f => {
+        const fullPath = join(projectDir, f)
+        return { name: f, path: fullPath, mtime: statSync(fullPath).mtimeMs }
+      })
+      .sort((a, b) => b.mtime - a.mtime)
+
+    return files.length > 0 ? files[0].path : null
+  } catch {
+    return null
+  }
+}
+
+function sessionIdFromPath(filePath: string): string {
+  const basename = filePath.split(/[\\/]/).pop() || ''
+  return basename.replace('.jsonl', '')
+}
+
+// ── Helpers ──
+
+function truncate(str: string, maxLen: number): string {
+  if (!str || str.length <= maxLen) return str || ''
+  return str.slice(0, maxLen) + '...'
+}
+
+function extractText(content: any): string {
+  if (typeof content === 'string') {
+    return content.replace(/<[^>]+>/g, '').trim()
+  }
+  if (Array.isArray(content)) {
+    return content
+      .filter((c: any) => c.type === 'text')
+      .map((c: any) => c.text || '')
+      .join('\n')
+      .replace(/<[^>]+>/g, '')
+      .trim()
+  }
+  return ''
+}
+
+// ── JSONL parsing ──
+
+interface ParsedLine {
+  type: string
+  data: any
+}
+
+function parseTranscriptFile(filePath: string): ParsedLine[] {
+  const content = readFileSync(filePath, 'utf8')
+  const rawLines = content.trim().split('\n')
+  const lines: ParsedLine[] = []
+
+  for (const line of rawLines) {
+    if (!line.trim()) continue
+    try {
+      const obj = JSON.parse(line)
+      lines.push({ type: obj.type, data: obj })
+    } catch {
+      // Skip unparseable lines
+    }
+  }
+
+  return lines
+}
+
+// ── Build analysis from parsed lines ──
+
+function buildAnalysis(lines: ParsedLine[], fileSessionId: string): TranscriptAnalysis {
+  let sessionId = fileSessionId
+  let model = ''
+  let version = ''
+  let gitBranch = ''
+  let cwd = ''
+  let startTime = ''
+  let endTime = ''
+
+  const messages: TranscriptMessage[] = []
+  const toolCalls: ToolCall[] = []
+  const filesModified = new Set<string>()
+  const subagents: SubagentInfo[] = []
+  const summaries: string[] = []
+  const turnTokens: TurnTokens[] = []
+
+  let totalInput = 0
+  let totalOutput = 0
+  let totalCacheRead = 0
+  let totalCacheCreation = 0
+  let thinkingBlocks = 0
+  let errors = 0
+
+  // Track assistant message chunks by message.id (streaming chunks share the same id)
+  const assistantChunks = new Map<string, {
+    uuid: string
+    timestamp: string
+    model: string
+    textParts: string[]
+    toolNames: string[]
+    hasThinking: boolean
+    usage: any
+    pendingToolCalls: { name: string; input: any; id: string; timestamp: string }[]
+  }>()
+
+  // Track tool results by tool_use_id
+  const toolResults = new Map<string, { content: string; isError: boolean }>()
+
+  // ── First pass: collect all data ──
+  for (const { type, data } of lines) {
+    // Extract metadata from first message that has it
+    if (data.sessionId && !sessionId) sessionId = data.sessionId
+    if (data.version && !version) version = data.version
+    if (data.gitBranch && !gitBranch) gitBranch = data.gitBranch
+    if (data.cwd && !cwd) cwd = data.cwd
+
+    // Track time bounds
+    if (data.timestamp) {
+      if (!startTime || data.timestamp < startTime) startTime = data.timestamp
+      if (!endTime || data.timestamp > endTime) endTime = data.timestamp
+    }
+
+    switch (type) {
+      case 'user': {
+        const msg = data.message
+        if (!msg) break
+
+        // Collect tool results (user messages contain tool_result blocks)
+        if (Array.isArray(msg.content)) {
+          for (const block of msg.content) {
+            if (block.type === 'tool_result') {
+              const resultText = typeof block.content === 'string'
+                ? block.content
+                : Array.isArray(block.content)
+                  ? block.content.map((c: any) => c.text || '').join('\n')
+                  : ''
+              toolResults.set(block.tool_use_id, {
+                content: truncate(resultText, 300),
+                isError: !!block.is_error
+              })
+              if (block.is_error) errors++
+            }
+          }
+        }
+
+        // Add as user message (skip meta, skip tool-result-only messages)
+        const isMeta = !!data.isMeta
+        const text = extractText(msg.content)
+        const hasToolResult = Array.isArray(msg.content) &&
+          msg.content.some((c: any) => c.type === 'tool_result')
+
+        if (text && !hasToolResult) {
+          messages.push({
+            uuid: data.uuid || '',
+            role: 'user',
+            content: text,
+            timestamp: data.timestamp || '',
+            isMeta,
+            hasThinking: false
+          })
+        }
+        break
+      }
+
+      case 'assistant': {
+        const msg = data.message
+        if (!msg || msg.role !== 'assistant') break
+
+        const msgId = msg.id || data.uuid
+        if (!model && msg.model) model = msg.model
+
+        let chunk = assistantChunks.get(msgId)
+        if (!chunk) {
+          chunk = {
+            uuid: data.uuid || '',
+            timestamp: data.timestamp || '',
+            model: msg.model || '',
+            textParts: [],
+            toolNames: [],
+            hasThinking: false,
+            usage: null,
+            pendingToolCalls: []
+          }
+          assistantChunks.set(msgId, chunk)
+        }
+
+        // Take latest usage (streaming chunks repeat usage, last is most accurate)
+        if (msg.usage) chunk.usage = msg.usage
+
+        // Process content blocks (each JSONL line typically has one block)
+        if (Array.isArray(msg.content)) {
+          for (const block of msg.content) {
+            if (block.type === 'text' && block.text?.trim()) {
+              chunk.textParts.push(block.text)
+            } else if (block.type === 'thinking') {
+              chunk.hasThinking = true
+              thinkingBlocks++
+            } else if (block.type === 'tool_use') {
+              chunk.toolNames.push(block.name)
+              chunk.pendingToolCalls.push({
+                name: block.name,
+                input: block.input,
+                id: block.id,
+                timestamp: data.timestamp || ''
+              })
+            }
+          }
+        }
+        break
+      }
+
+      case 'progress': {
+        if (data.data?.type === 'agent_progress' && data.data.agentId) {
+          const existing = subagents.find(s => s.agentId === data.data.agentId)
+          if (!existing) {
+            subagents.push({
+              agentId: data.data.agentId,
+              prompt: truncate(data.data.prompt || '', 200),
+              timestamp: data.timestamp || ''
+            })
+          }
+        }
+        break
+      }
+
+      case 'file-history-snapshot': {
+        const backups = data.snapshot?.trackedFileBackups
+        if (backups && typeof backups === 'object') {
+          for (const filePath of Object.keys(backups)) {
+            filesModified.add(filePath)
+          }
+        }
+        break
+      }
+
+      case 'summary': {
+        const summaryText = data.summary || data.message?.content
+        if (summaryText) {
+          summaries.push(truncate(
+            typeof summaryText === 'string' ? summaryText : JSON.stringify(summaryText),
+            1000
+          ))
+        }
+        break
+      }
+    }
+  }
+
+  // ── Second pass: assemble assistant messages and finalize tool calls ──
+  let turnIndex = 0
+  for (const [, chunk] of assistantChunks) {
+    const text = chunk.textParts.join('\n').trim()
+
+    if (text || chunk.toolNames.length > 0) {
+      const msgTokens = chunk.usage
+        ? { input: chunk.usage.input_tokens || 0, output: chunk.usage.output_tokens || 0 }
+        : undefined
+
+      messages.push({
+        uuid: chunk.uuid,
+        role: 'assistant',
+        content: text || `[Tool calls: ${chunk.toolNames.join(', ')}]`,
+        timestamp: chunk.timestamp,
+        isMeta: false,
+        tokens: msgTokens,
+        toolCalls: chunk.toolNames.length > 0 ? chunk.toolNames : undefined,
+        hasThinking: chunk.hasThinking
+      })
+    }
+
+    // Finalize tool calls with results
+    for (const tc of chunk.pendingToolCalls) {
+      const result = toolResults.get(tc.id)
+      const inputStr = typeof tc.input === 'string' ? tc.input : JSON.stringify(tc.input)
+      toolCalls.push({
+        name: tc.name,
+        input: truncate(inputStr, 500),
+        output: result?.content,
+        timestamp: tc.timestamp,
+        isError: result?.isError || false
+      })
+    }
+
+    // Token tracking per turn
+    if (chunk.usage) {
+      const u = chunk.usage
+      const input = u.input_tokens || 0
+      const output = u.output_tokens || 0
+      const cacheRead = u.cache_read_input_tokens || 0
+      const cacheCreation = u.cache_creation_input_tokens || 0
+
+      totalInput += input
+      totalOutput += output
+      totalCacheRead += cacheRead
+      totalCacheCreation += cacheCreation
+
+      turnTokens.push({
+        turnIndex: turnIndex++,
+        input,
+        output,
+        cacheRead,
+        cacheCreation,
+        model: chunk.model
+      })
+    }
+  }
+
+  // Sort messages chronologically
+  messages.sort((a, b) => a.timestamp.localeCompare(b.timestamp))
+
+  // Build tool summary
+  const toolSummary: Record<string, number> = {}
+  for (const tc of toolCalls) {
+    toolSummary[tc.name] = (toolSummary[tc.name] || 0) + 1
+  }
+
+  // Extract files from Edit/Write tool calls
+  for (const tc of toolCalls) {
+    if (['Edit', 'Write', 'NotebookEdit'].includes(tc.name) && tc.input) {
+      try {
+        const input = typeof tc.input === 'string' ? JSON.parse(tc.input) : tc.input
+        if (input.file_path) filesModified.add(input.file_path)
+        if (input.notebook_path) filesModified.add(input.notebook_path)
+      } catch { /* skip */ }
+    }
+  }
+
+  const duration = startTime && endTime
+    ? new Date(endTime).getTime() - new Date(startTime).getTime()
+    : 0
+
+  const userMsgCount = messages.filter(m => m.role === 'user').length
+  const assistantMsgCount = messages.filter(m => m.role === 'assistant').length
+
+  return {
+    sessionId,
+    model,
+    version,
+    gitBranch,
+    cwd,
+    startTime,
+    endTime,
+    duration,
+    messages,
+    tokens: {
+      totalInput,
+      totalOutput,
+      totalCacheRead,
+      totalCacheCreation,
+      byTurn: turnTokens
+    },
+    tools: {
+      summary: toolSummary,
+      calls: toolCalls
+    },
+    filesModified: [...filesModified],
+    subagents,
+    summaries,
+    stats: {
+      messageCount: messages.length,
+      userMessageCount: userMsgCount,
+      assistantMessageCount: assistantMsgCount,
+      toolCallCount: toolCalls.length,
+      thinkingBlocks,
+      errors
+    }
+  }
+}
+
+// ── Exported API ──
+
+export function getTranscriptAnalysis(sessionId?: string): TranscriptAnalysis | null {
+  const filePath = resolveTranscriptPath(sessionId)
+  if (!filePath) return null
+
+  const sid = sessionIdFromPath(filePath)
+
+  try {
+    const stat = statSync(filePath)
+    const mtime = stat.mtimeMs
+
+    // Return cached if file hasn't changed
+    const cached = cache.get(sid)
+    if (cached && cached.lastModified === mtime) {
+      return cached.analysis
+    }
+
+    // Full parse
+    const lines = parseTranscriptFile(filePath)
+    const analysis = buildAnalysis(lines, sid)
+
+    cache.set(sid, { analysis, lastModified: mtime })
+    return analysis
+  } catch (e) {
+    console.error('[transcript-engine] Error parsing transcript:', e)
+    return null
+  }
+}
+
+export function listSessions(): SessionInfo[] {
+  const projectDir = getProjectDir()
+  if (!existsSync(projectDir)) return []
+
+  try {
+    const files = readdirSync(projectDir)
+      .filter(f => f.endsWith('.jsonl'))
+      .map(f => {
+        const fullPath = join(projectDir, f)
+        const stat = statSync(fullPath)
+        return { name: f, path: fullPath, mtime: stat.mtimeMs }
+      })
+      .sort((a, b) => b.mtime - a.mtime)
+
+    return files.map(f => {
+      const sid = f.name.replace('.jsonl', '')
+
+      // Try cache first for quick metadata
+      const cached = cache.get(sid)
+      if (cached && cached.lastModified === f.mtime) {
+        return {
+          id: sid,
+          startTime: cached.analysis.startTime,
+          messageCount: cached.analysis.stats.messageCount,
+          model: cached.analysis.model
+        }
+      }
+
+      // Quick scan: read first few lines for metadata without full parse
+      try {
+        const content = readFileSync(f.path, 'utf8')
+        const firstLines = content.split('\n').slice(0, 20)
+        let startTime = ''
+        let model = ''
+        let lineCount = content.split('\n').filter(l => l.trim()).length
+
+        for (const line of firstLines) {
+          if (!line.trim()) continue
+          try {
+            const obj = JSON.parse(line)
+            if (obj.timestamp && !startTime) startTime = obj.timestamp
+            if (obj.type === 'assistant' && obj.message?.model && !model) {
+              model = obj.message.model
+            }
+          } catch { /* skip */ }
+        }
+
+        return {
+          id: sid,
+          startTime,
+          messageCount: lineCount,
+          model
+        }
+      } catch {
+        return { id: sid, startTime: '', messageCount: 0, model: '' }
+      }
+    })
+  } catch {
+    return []
+  }
+}