agent-ui/server/services/transcript-engine.ts

// Transcript Engine - Parses Claude Code JSONL transcripts
// Module-level state pattern (like terminal.ts, torch-handler.ts)

import { existsSync, readFileSync, statSync, readdirSync } from 'fs'
import { join } from 'path'
import { homedir } from 'os'
import { WORKING_DIR } from '../config'

// ── Types ──

export interface TranscriptAnalysis {
  sessionId: string
  model: string
  version: string
  gitBranch: string
  cwd: string
  startTime: string
  endTime: string
  duration: number
  messages: TranscriptMessage[]
  tokens: {
    totalInput: number
    totalOutput: number
    totalCacheRead: number
    totalCacheCreation: number
    byTurn: TurnTokens[]
  }
  tools: {
    summary: Record<string, number>
    calls: ToolCall[]
  }
  filesModified: string[]
  subagents: SubagentInfo[]
  summaries: string[]
  stats: {
    messageCount: number
    userMessageCount: number
    assistantMessageCount: number
    toolCallCount: number
    thinkingBlocks: number
    errors: number
  }
}

export interface TranscriptMessage {
  uuid: string
  role: 'user' | 'assistant'
  content: string
  timestamp: string
  isMeta: boolean
  tokens?: { input: number; output: number }
  toolCalls?: string[]
  hasThinking: boolean
}

export interface ToolCall {
  name: string
  input: unknown
  output?: string
  timestamp: string
  isError: boolean
}

export interface TurnTokens {
  turnIndex: number
  input: number
  output: number
  cacheRead: number
  cacheCreation: number
  model: string
}

export interface SubagentInfo {
  agentId: string
  prompt: string
  timestamp: string
}

export interface SessionInfo {
  id: string
  startTime: string
  messageCount: number
  model: string
}

// ── Module-level cache ──

const cache = new Map<string, {
  analysis: TranscriptAnalysis
  lastModified: number
}>()

// ── Project hash ──

function getProjectHash(): string {
  // C:\Users\jodar\agent-ui → C--Users-jodar-agent-ui
  return WORKING_DIR.replace(/[\\/]/g, '-').replace(/:/g, '-')
}

function getProjectDir(): string {
  return join(homedir(), '.claude', 'projects', getProjectHash())
}

// ── Path resolution ──

function resolveTranscriptPath(sessionId?: string): string | null {
  const projectDir = getProjectDir()
  if (!existsSync(projectDir)) return null

  if (sessionId && sessionId !== 'latest') {
    const filePath = join(projectDir, `${sessionId}.jsonl`)
    return existsSync(filePath) ? filePath : null
  }

  // Find most recent by mtime
  try {
    const files = readdirSync(projectDir)
      .filter(f => f.endsWith('.jsonl'))
      .map(f => {
        const fullPath = join(projectDir, f)
        return { name: f, path: fullPath, mtime: statSync(fullPath).mtimeMs }
      })
      .sort((a, b) => b.mtime - a.mtime)

    return files.length > 0 ? files[0].path : null
  } catch {
    return null
  }
}

function sessionIdFromPath(filePath: string): string {
  const basename = filePath.split(/[\\/]/).pop() || ''
  return basename.replace('.jsonl', '')
}

// ── Helpers ──

function truncate(str: string, maxLen: number): string {
  if (!str || str.length <= maxLen) return str || ''
  return str.slice(0, maxLen) + '...'
}

function extractText(content: any): string {
  if (typeof content === 'string') {
    return content.replace(/<[^>]+>/g, '').trim()
  }
  if (Array.isArray(content)) {
    return content
      .filter((c: any) => c.type === 'text')
      .map((c: any) => c.text || '')
      .join('\n')
      .replace(/<[^>]+>/g, '')
      .trim()
  }
  return ''
}

// ── JSONL parsing ──

interface ParsedLine {
  type: string
  data: any
}

function parseTranscriptFile(filePath: string): ParsedLine[] {
  const content = readFileSync(filePath, 'utf8')
  const rawLines = content.trim().split('\n')
  const lines: ParsedLine[] = []

  for (const line of rawLines) {
    if (!line.trim()) continue
    try {
      const obj = JSON.parse(line)
      lines.push({ type: obj.type, data: obj })
    } catch {
      // Skip unparseable lines
    }
  }

  return lines
}

// ── Build analysis from parsed lines ──

function buildAnalysis(lines: ParsedLine[], fileSessionId: string): TranscriptAnalysis {
  let sessionId = fileSessionId
  let model = ''
  let version = ''
  let gitBranch = ''
  let cwd = ''
  let startTime = ''
  let endTime = ''

  const messages: TranscriptMessage[] = []
  const toolCalls: ToolCall[] = []
  const filesModified = new Set<string>()
  const subagents: SubagentInfo[] = []
  const summaries: string[] = []
  const turnTokens: TurnTokens[] = []

  let totalInput = 0
  let totalOutput = 0
  let totalCacheRead = 0
  let totalCacheCreation = 0
  let thinkingBlocks = 0
  let errors = 0

  // Track assistant message chunks by message.id (streaming chunks share the same id)
  const assistantChunks = new Map<string, {
    uuid: string
    timestamp: string
    model: string
    textParts: string[]
    toolNames: string[]
    hasThinking: boolean
    usage: any
    pendingToolCalls: { name: string; input: any; id: string; timestamp: string }[]
  }>()

  // Track tool results by tool_use_id
  const toolResults = new Map<string, { content: string; isError: boolean }>()

  // ── First pass: collect all data ──
  for (const { type, data } of lines) {
    // Extract metadata from first message that has it
    if (data.sessionId && !sessionId) sessionId = data.sessionId
    if (data.version && !version) version = data.version
    if (data.gitBranch && !gitBranch) gitBranch = data.gitBranch
    if (data.cwd && !cwd) cwd = data.cwd

    // Track time bounds
    if (data.timestamp) {
      if (!startTime || data.timestamp < startTime) startTime = data.timestamp
      if (!endTime || data.timestamp > endTime) endTime = data.timestamp
    }

    switch (type) {
      case 'user': {
        const msg = data.message
        if (!msg) break

        // Collect tool results (user messages contain tool_result blocks)
        if (Array.isArray(msg.content)) {
          for (const block of msg.content) {
            if (block.type === 'tool_result') {
              const resultText = typeof block.content === 'string'
                ? block.content
                : Array.isArray(block.content)
                  ? block.content.map((c: any) => c.text || '').join('\n')
                  : ''
              toolResults.set(block.tool_use_id, {
                content: truncate(resultText, 300),
                isError: !!block.is_error
              })
              if (block.is_error) errors++
            }
          }
        }

        // Add as user message (skip meta, skip tool-result-only messages)
        const isMeta = !!data.isMeta
        const text = extractText(msg.content)
        const hasToolResult = Array.isArray(msg.content) &&
          msg.content.some((c: any) => c.type === 'tool_result')

        if (text && !hasToolResult) {
          messages.push({
            uuid: data.uuid || '',
            role: 'user',
            content: text,
            timestamp: data.timestamp || '',
            isMeta,
            hasThinking: false
          })
        }
        break
      }

      case 'assistant': {
        const msg = data.message
        if (!msg || msg.role !== 'assistant') break

        const msgId = msg.id || data.uuid
        if (!model && msg.model) model = msg.model

        let chunk = assistantChunks.get(msgId)
        if (!chunk) {
          chunk = {
            uuid: data.uuid || '',
            timestamp: data.timestamp || '',
            model: msg.model || '',
            textParts: [],
            toolNames: [],
            hasThinking: false,
            usage: null,
            pendingToolCalls: []
          }
          assistantChunks.set(msgId, chunk)
        }

        // Take latest usage (streaming chunks repeat usage, last is most accurate)
        if (msg.usage) chunk.usage = msg.usage

        // Process content blocks (each JSONL line typically has one block)
        if (Array.isArray(msg.content)) {
          for (const block of msg.content) {
            if (block.type === 'text' && block.text?.trim()) {
              chunk.textParts.push(block.text)
            } else if (block.type === 'thinking') {
              chunk.hasThinking = true
              thinkingBlocks++
            } else if (block.type === 'tool_use') {
              chunk.toolNames.push(block.name)
              chunk.pendingToolCalls.push({
                name: block.name,
                input: block.input,
                id: block.id,
                timestamp: data.timestamp || ''
              })
            }
          }
        }
        break
      }

      case 'progress': {
        if (data.data?.type === 'agent_progress' && data.data.agentId) {
          const existing = subagents.find(s => s.agentId === data.data.agentId)
          if (!existing) {
            subagents.push({
              agentId: data.data.agentId,
              prompt: truncate(data.data.prompt || '', 200),
              timestamp: data.timestamp || ''
            })
          }
        }
        break
      }

      case 'file-history-snapshot': {
        const backups = data.snapshot?.trackedFileBackups
        if (backups && typeof backups === 'object') {
          for (const filePath of Object.keys(backups)) {
            filesModified.add(filePath)
          }
        }
        break
      }

      case 'summary': {
        const summaryText = data.summary || data.message?.content
        if (summaryText) {
          summaries.push(truncate(
            typeof summaryText === 'string' ? summaryText : JSON.stringify(summaryText),
            1000
          ))
        }
        break
      }
    }
  }

  // ── Second pass: assemble assistant messages and finalize tool calls ──
  let turnIndex = 0
  for (const [, chunk] of assistantChunks) {
    const text = chunk.textParts.join('\n').trim()

    if (text || chunk.toolNames.length > 0) {
      const msgTokens = chunk.usage
        ? { input: chunk.usage.input_tokens || 0, output: chunk.usage.output_tokens || 0 }
        : undefined

      messages.push({
        uuid: chunk.uuid,
        role: 'assistant',
        content: text || `[Tool calls: ${chunk.toolNames.join(', ')}]`,
        timestamp: chunk.timestamp,
        isMeta: false,
        tokens: msgTokens,
        toolCalls: chunk.toolNames.length > 0 ? chunk.toolNames : undefined,
        hasThinking: chunk.hasThinking
      })
    }

    // Finalize tool calls with results
    for (const tc of chunk.pendingToolCalls) {
      const result = toolResults.get(tc.id)
      const inputStr = typeof tc.input === 'string' ? tc.input : JSON.stringify(tc.input)
      toolCalls.push({
        name: tc.name,
        input: truncate(inputStr, 500),
        output: result?.content,
        timestamp: tc.timestamp,
        isError: result?.isError || false
      })
    }

    // Token tracking per turn
    if (chunk.usage) {
      const u = chunk.usage
      const input = u.input_tokens || 0
      const output = u.output_tokens || 0
      const cacheRead = u.cache_read_input_tokens || 0
      const cacheCreation = u.cache_creation_input_tokens || 0

      totalInput += input
      totalOutput += output
      totalCacheRead += cacheRead
      totalCacheCreation += cacheCreation

      turnTokens.push({
        turnIndex: turnIndex++,
        input,
        output,
        cacheRead,
        cacheCreation,
        model: chunk.model
      })
    }
  }

  // Sort messages chronologically
  messages.sort((a, b) => a.timestamp.localeCompare(b.timestamp))

  // Build tool summary
  const toolSummary: Record<string, number> = {}
  for (const tc of toolCalls) {
    toolSummary[tc.name] = (toolSummary[tc.name] || 0) + 1
  }

  // Extract files from Edit/Write tool calls
  for (const tc of toolCalls) {
    if (['Edit', 'Write', 'NotebookEdit'].includes(tc.name) && tc.input) {
      try {
        const input = typeof tc.input === 'string' ? JSON.parse(tc.input) : tc.input
        if (input.file_path) filesModified.add(input.file_path)
        if (input.notebook_path) filesModified.add(input.notebook_path)
      } catch { /* skip */ }
    }
  }

  const duration = startTime && endTime
    ? new Date(endTime).getTime() - new Date(startTime).getTime()
    : 0

  const userMsgCount = messages.filter(m => m.role === 'user').length
  const assistantMsgCount = messages.filter(m => m.role === 'assistant').length

  return {
    sessionId,
    model,
    version,
    gitBranch,
    cwd,
    startTime,
    endTime,
    duration,
    messages,
    tokens: {
      totalInput,
      totalOutput,
      totalCacheRead,
      totalCacheCreation,
      byTurn: turnTokens
    },
    tools: {
      summary: toolSummary,
      calls: toolCalls
    },
    filesModified: [...filesModified],
    subagents,
    summaries,
    stats: {
      messageCount: messages.length,
      userMessageCount: userMsgCount,
      assistantMessageCount: assistantMsgCount,
      toolCallCount: toolCalls.length,
      thinkingBlocks,
      errors
    }
  }
}

// ── Exported API ──

export function getTranscriptAnalysis(sessionId?: string): TranscriptAnalysis | null {
  const filePath = resolveTranscriptPath(sessionId)
  if (!filePath) return null

  const sid = sessionIdFromPath(filePath)

  try {
    const stat = statSync(filePath)
    const mtime = stat.mtimeMs

    // Return cached if file hasn't changed
    const cached = cache.get(sid)
    if (cached && cached.lastModified === mtime) {
      return cached.analysis
    }

    // Full parse
    const lines = parseTranscriptFile(filePath)
    const analysis = buildAnalysis(lines, sid)

    cache.set(sid, { analysis, lastModified: mtime })
    return analysis
  } catch (e) {
    console.error('[transcript-engine] Error parsing transcript:', e)
    return null
  }
}

export function listSessions(): SessionInfo[] {
  const projectDir = getProjectDir()
  if (!existsSync(projectDir)) return []

  try {
    const files = readdirSync(projectDir)
      .filter(f => f.endsWith('.jsonl'))
      .map(f => {
        const fullPath = join(projectDir, f)
        const stat = statSync(fullPath)
        return { name: f, path: fullPath, mtime: stat.mtimeMs }
      })
      .sort((a, b) => b.mtime - a.mtime)

    return files.map(f => {
      const sid = f.name.replace('.jsonl', '')

      // Try cache first for quick metadata
      const cached = cache.get(sid)
      if (cached && cached.lastModified === f.mtime) {
        return {
          id: sid,
          startTime: cached.analysis.startTime,
          messageCount: cached.analysis.stats.messageCount,
          model: cached.analysis.model
        }
      }

      // Quick scan: read first few lines for metadata without full parse
      try {
        const content = readFileSync(f.path, 'utf8')
        const firstLines = content.split('\n').slice(0, 20)
        let startTime = ''
        let model = ''
        let lineCount = content.split('\n').filter(l => l.trim()).length

        for (const line of firstLines) {
          if (!line.trim()) continue
          try {
            const obj = JSON.parse(line)
            if (obj.timestamp && !startTime) startTime = obj.timestamp
            if (obj.type === 'assistant' && obj.message?.model && !model) {
              model = obj.message.model
            }
          } catch { /* skip */ }
        }

        return {
          id: sid,
          startTime,
          messageCount: lineCount,
          model
        }
      } catch {
        return { id: sid, startTime: '', messageCount: 0, model: '' }
      }
    })
  } catch {
    return []
  }
}