context engineer

Published: 4/5/2026

research the context design in Claude Code

上下文工程

本篇基于 Claude Code-2.1.87版本，深入探讨 agent harness 中 context 的高设计密度内容

本来想在文中加入高频且量少的代码片段，但由于内容实在太多，遗憾放弃

Prepared

参考src/query.ts ：最核心的代码queryloop，一千五百行代码，我们可以得到信息，Claude Code不是把所有的历史信息都交给模型，而是在每一次都重新构造给模型看的上下文，也由此可以引出之后的，compact 机制，prompt 分层，context 输入输出

从输入进入后，REPL路径经过 handlePromptSubmit 之后，第一个问题就是防止同一 session 并发 trun：在src/utils/QueryGuard.ts中；保留Guard（类似 watch 的方式）进行队列处理，状态从idle （空闲） -> dispatching（调度）；如果不为空闲（另一个查询/调度正在进行中）就返回 false；

之后就进入系统前缀部分，分层依次是 system prompt，system context，user context；而system prompt 又精确分层为静态段和动态段；

静态段是 intro，system，doing tasks，action，tool…
动态段是 session guidance，memory，language，mcp，token budget …

在真正发请求之前，queryContext.ts 中 fetchSystemPromptParts 会把 defaultSystemPrompt + userContext + systemContext 拉齐

1
export async function fetchSystemPromptParts({
2
  tools,
3
  mainLoopModel,
4
  additionalWorkingDirectories,
5
  mcpClients,
6
  customSystemPrompt,
7
}: {
8
  tools: Tools
9
  mainLoopModel: string
10
  additionalWorkingDirectories: string[]
11
  mcpClients: MCPServerConnection[]
12
  customSystemPrompt: string | undefined
13
}): Promise<{
14
  defaultSystemPrompt: string[]
15
  userContext: { [k: string]: string }
16
  systemContext: { [k: string]: string }
17
}> {
18
  const [defaultSystemPrompt, userContext, systemContext] = await Promise.all([
19
    customSystemPrompt !== undefined
20
      ? Promise.resolve([])
21
      : getSystemPrompt(
22
          tools,
23
          mainLoopModel,
24
          additionalWorkingDirectories,
25
          mcpClients,
26
        ),
27
    getUserContext(),
28
    customSystemPrompt !== undefined ? Promise.resolve({}) : getSystemContext(),
29
  ])
30
  return { defaultSystemPrompt, userContext, systemContext }
31
}

从技术上来说（笔者是对typescript几乎没啥了解，轻喷）

性能优化：使用 Promise.all 并行处理，避免串行等待（比如生成 Prompt 和获取用户上下文可以同时做）。
短路逻辑**：通过 customSystemPrompt 判断，**按需加载。如果用户自定义了，就不浪费资源去生成默认的。
关注点分离：将 defaultSystemPrompt（框架生成的）和 customSystemPrompt（用户传入的）在逻辑上分开，后续拼接时更容易处理优先级。

这同样可以给我们启发，如果我们有自己的一套在实践中生成的 customSystemPrompt ，defaultSystemPrompt 和 systemContext 就会直接返回[]和{}，这样跳过生成还不会污染环境变量，是更轻松地做法

之后 spiltSysPromptPrefix 会按 boundary 切成 cache block

1
export function splitSysPromptPrefix(
2
  systemPrompt: SystemPrompt,
3
  options?: { skipGlobalCacheForSystemPrompt?: boolean },
4
): SystemPromptBlock[] {
5
  const useGlobalCacheFeature = shouldUseGlobalCacheScope()
6
  if (useGlobalCacheFeature && options?.skipGlobalCacheForSystemPrompt) {
7
    logEvent('tengu_sysprompt_using_tool_based_cache', {
8
      promptBlockCount: systemPrompt.length,
9
    })
10

11
    // Filter out boundary marker, return blocks without global scope
12
    let attributionHeader: string | undefined
13
    let systemPromptPrefix: string | undefined
14
    const rest: string[] = []
15

16
    for (const prompt of systemPrompt) {
17
      if (!prompt) continue
18
      if (prompt === SYSTEM_PROMPT_DYNAMIC_BOUNDARY) continue // Skip boundary
19
      if (prompt.startsWith('x-anthropic-billing-header')) {
20
        attributionHeader = prompt
21
      } else if (CLI_SYSPROMPT_PREFIXES.has(prompt)) {
22
        systemPromptPrefix = prompt
23
      } else {
24
        rest.push(prompt)
25
      }
26
    }
27

28
    const result: SystemPromptBlock[] = []
29
    if (attributionHeader) {
30
      result.push({ text: attributionHeader, cacheScope: null })
31
    }
32
    if (systemPromptPrefix) {
33
      result.push({ text: systemPromptPrefix, cacheScope: 'org' })
34
    }
35
    const restJoined = rest.join('\n\n')
36
    if (restJoined) {
37
      result.push({ text: restJoined, cacheScope: 'org' })
38
    }
39
    return result
40
  }
41

42
  if (useGlobalCacheFeature) {
43
    const boundaryIndex = systemPrompt.findIndex(
44
      s => s === SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
45
    )
46
    if (boundaryIndex !== -1) {
47
      let attributionHeader: string | undefined
48
      let systemPromptPrefix: string | undefined
49
      const staticBlocks: string[] = []
50
      const dynamicBlocks: string[] = []
51

52
      for (let i = 0; i < systemPrompt.length; i++) {
53
        const block = systemPrompt[i]
54
        if (!block || block === SYSTEM_PROMPT_DYNAMIC_BOUNDARY) continue
55

56
        if (block.startsWith('x-anthropic-billing-header')) {
57
          attributionHeader = block
58
        } else if (CLI_SYSPROMPT_PREFIXES.has(block)) {
59
          systemPromptPrefix = block
60
        } else if (i < boundaryIndex) {
61
          staticBlocks.push(block)
62
        } else {
63
          dynamicBlocks.push(block)
64
        }
65
      }
66

67
      const result: SystemPromptBlock[] = []
68
      if (attributionHeader)
69
        result.push({ text: attributionHeader, cacheScope: null })
70
      if (systemPromptPrefix)
71
        result.push({ text: systemPromptPrefix, cacheScope: null })
72
      const staticJoined = staticBlocks.join('\n\n')
73
      if (staticJoined)
74
        result.push({ text: staticJoined, cacheScope: 'global' })
75
      const dynamicJoined = dynamicBlocks.join('\n\n')
76
      if (dynamicJoined) result.push({ text: dynamicJoined, cacheScope: null })
77

78
      logEvent('tengu_sysprompt_boundary_found', {
79
        blockCount: result.length,
80
        staticBlockLength: staticJoined.length,
81
        dynamicBlockLength: dynamicJoined.length,
82
      })
83

84
      return result
85
    } else {
86
      logEvent('tengu_sysprompt_missing_boundary_marker', {
87
        promptBlockCount: systemPrompt.length,
88
      })
89
    }
90
  }
91
  let attributionHeader: string | undefined
92
  let systemPromptPrefix: string | undefined
93
  const rest: string[] = []
94

95
  for (const block of systemPrompt) {
96
    if (!block) continue
97

98
    if (block.startsWith('x-anthropic-billing-header')) {
99
      attributionHeader = block
100
    } else if (CLI_SYSPROMPT_PREFIXES.has(block)) {
101
      systemPromptPrefix = block
102
    } else {
103
      rest.push(block)
104
    }
105
  }
106

107
  const result: SystemPromptBlock[] = []
108
  if (attributionHeader)
109
    result.push({ text: attributionHeader, cacheScope: null })
110
  if (systemPromptPrefix)
111
    result.push({ text: systemPromptPrefix, cacheScope: 'org' })
112
  const restJoined = rest.join('\n\n')
113
  if (restJoined) result.push({ text: restJoined, cacheScope: 'org' })
114
  return result
115
}

比较关键的是

1
type CacheScope = 'global' | 'org' | null;

分为全局缓存（所有用户，会话，请求可复用）；组织级缓存（同一租户内复用，不同组织隔离）

缓存机制由底层推理框架（vllm/sglang，Anthropic Context Caching）实现，用于减少token重复传输，降低延迟和成本

以及启动全局缓存（含边界标记）的相关内容：

边界标记之前的内容被视为「静态模板」，可安全全局缓存
边界标记之后的内容被视为「动态上下文」，每次请求变化，不缓存

最后的fallback模式：

useGlobalCacheFeature === false，或
useGlobalCacheFeature === true 但 boundaryIndex === -1（没找到标记）

处理逻辑如下：

遍历所有的block，分类，billing header -> null；CLI prefix -> ‘org’；其他 -> 合并，‘org’

埋点：Anthropic Context Caching 如何处理底层缓存（或者 vllm）

稍微理一下：

系统前缀 = system prompt + system context + user context

system prompt = 静态层 + 动态层（通过 SYSTEM_PROMPT_DYNAMIC_BOUNDARY 分割）这样的好处也就是方便上面处理 prompt cache

至于user context 非常的 trivial 就是 CLAUDE.md/ 规则文件体系，对应的加载顺序很明显，离目录越近加载优先级越高，但CLAUDE.md只是表层，更深的还有 ”动态注入的 memory / attachment“。按文件路径匹配的 nested memory 走 src/utils/attachments.ts 的 memoryFileTOAttachments（同步）和getNestedMemoryAttachmentsForFile（异步）

前者将记忆文件转换为 LLM附件，并进行去重和维护缓存（LRU）
后者按照优先级遍历目录树，找出目标文件适用的所有记忆规则，再交给“前者”处理

两者配合，实现动态、分层、去重的上下文注入机制，让 LLM 每次请求都能获得最相关的项目规范，同时控制 token 消耗和缓存效率。

Runtime

运行时以 src/Tool.ts 为总线（ToolUseContext），这不仅是工具容器，而是携带 messages，readFileState，appState，permission context，ontentReplacementState renderedSystemPrompt 的总线。Claude Code 的上下文工程，在runtime的时候基本以此为准

1
export type ToolUseContext = {
2
  options: {
3
    commands: Command[]
4
    debug: boolean
5
    mainLoopModel: string
6
    tools: Tools
7
    verbose: boolean
8
    thinkingConfig: ThinkingConfig
9
    mcpClients: MCPServerConnection[]
10
    mcpResources: Record<string, ServerResource[]>
11
    isNonInteractiveSession: boolean
12
    agentDefinitions: AgentDefinitionsResult
13
    maxBudgetUsd?: number
14
    /** Custom system prompt that replaces the default system prompt */
15
    customSystemPrompt?: string
16
    /** Additional system prompt appended after the main system prompt */
17
    appendSystemPrompt?: string
18
    /** Override querySource for analytics tracking */
19
    querySource?: QuerySource
20
    /** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
21
    refreshTools?: () => Tools
22
  }
23
  abortController: AbortController
24
  readFileState: FileStateCache
25
  getAppState(): AppState
26
  setAppState(f: (prev: AppState) => AppState): void
27
  /**
28
   * Always-shared setAppState for session-scoped infrastructure (background
29
   * tasks, session hooks). Unlike setAppState, which is no-op for async agents
30
   * (see createSubagentContext), this always reaches the root store so agents
31
   * at any nesting depth can register/clean up infrastructure that outlives
32
   * a single turn. Only set by createSubagentContext; main-thread contexts
33
   * fall back to setAppState.
34
   */
35
  setAppStateForTasks?: (f: (prev: AppState) => AppState) => void
36
  /**
37
   * Optional handler for URL elicitations triggered by tool call errors (-32042).
38
   * In print/SDK mode, this delegates to structuredIO.handleElicitation.
39
   * In REPL mode, this is undefined and the queue-based UI path is used.
40
   */
41
  handleElicitation?: (
42
    serverName: string,
43
    params: ElicitRequestURLParams,
44
    signal: AbortSignal,
45
  ) => Promise<ElicitResult>
46
  setToolJSX?: SetToolJSXFn
47
  addNotification?: (notif: Notification) => void
48
  /** Append a UI-only system message to the REPL message list. Stripped at the
49
   *  normalizeMessagesForAPI boundary — the Exclude<> makes that type-enforced. */
50
  appendSystemMessage?: (
51
    msg: Exclude<SystemMessage, SystemLocalCommandMessage>,
52
  ) => void
53
  /** Send an OS-level notification (iTerm2, Kitty, Ghostty, bell, etc.) */
54
  sendOSNotification?: (opts: {
55
    message: string
56
    notificationType: string
57
  }) => void
58
  nestedMemoryAttachmentTriggers?: Set<string>
59
  /**
60
   * CLAUDE.md paths already injected as nested_memory attachments this
61
   * session. Dedup for memoryFilesToAttachments — readFileState is an LRU
62
   * that evicts entries in busy sessions, so its .has() check alone can
63
   * re-inject the same CLAUDE.md dozens of times.
64
   */
65
  loadedNestedMemoryPaths?: Set<string>
66
  dynamicSkillDirTriggers?: Set<string>
67
  /** Skill names surfaced via skill_discovery this session. Telemetry only (feeds was_discovered). */
68
  discoveredSkillNames?: Set<string>
69
  userModified?: boolean
70
  setInProgressToolUseIDs: (f: (prev: Set<string>) => Set<string>) => void
71
  /** Only wired in interactive (REPL) contexts; SDK/QueryEngine don't set this. */
72
  setHasInterruptibleToolInProgress?: (v: boolean) => void
73
  setResponseLength: (f: (prev: number) => number) => void
74
  /** Ant-only: push a new API metrics entry for OTPS tracking.
75
   *  Called by subagent streaming when a new API request starts. */
76
  pushApiMetricsEntry?: (ttftMs: number) => void
77
  setStreamMode?: (mode: SpinnerMode) => void
78
  onCompactProgress?: (event: CompactProgressEvent) => void
79
  setSDKStatus?: (status: SDKStatus) => void
80
  openMessageSelector?: () => void
81
  updateFileHistoryState: (
82
    updater: (prev: FileHistoryState) => FileHistoryState,
83
  ) => void
84
  updateAttributionState: (
85
    updater: (prev: AttributionState) => AttributionState,
86
  ) => void
87
  setConversationId?: (id: UUID) => void
88
  agentId?: AgentId // Only set for subagents; use getSessionId() for session ID. Hooks use this to distinguish subagent calls.
89
  agentType?: string // Subagent type name. For the main thread's --agent type, hooks fall back to getMainThreadAgentType().
90
  /** When true, canUseTool must always be called even when hooks auto-approve.
91
   *  Used by speculation for overlay file path rewriting. */
92
  requireCanUseTool?: boolean
93
  messages: Message[]
94
  fileReadingLimits?: {
95
    maxTokens?: number
96
    maxSizeBytes?: number
97
  }
98
  globLimits?: {
99
    maxResults?: number
100
  }
101
  toolDecisions?: Map<
102
    string,
103
    {
104
      source: string
105
      decision: 'accept' | 'reject'
106
      timestamp: number
107
    }
108
  >
109
  queryTracking?: QueryChainTracking
110
  /** Callback factory for requesting interactive prompts from the user.
111
   * Returns a prompt callback bound to the given source name.
112
   * Only available in interactive (REPL) contexts. */
113
  requestPrompt?: (
114
    sourceName: string,
115
    toolInputSummary?: string | null,
116
  ) => (request: PromptRequest) => Promise<PromptResponse>
117
  toolUseId?: string
118
  criticalSystemReminder_EXPERIMENTAL?: string
119
  /** When true, preserve toolUseResult on messages even for subagents.
120
   * Used by in-process teammates whose transcripts are viewable by the user. */
121
  preserveToolUseResults?: boolean
122
  /** Local denial tracking state for async subagents whose setAppState is a
123
   *  no-op. Without this, the denial counter never accumulates and the
124
   *  fallback-to-prompting threshold is never reached. Mutable — the
125
   *  permissions code updates it in place. */
126
  localDenialTracking?: DenialTrackingState
127
  /**
128
   * Per-conversation-thread content replacement state for the tool result
129
   * budget. When present, query.ts applies the aggregate tool result budget.
130
   * Main thread: REPL provisions once (never resets — stale UUID keys
131
   * are inert). Subagents: createSubagentContext clones the parent's state
132
   * by default (cache-sharing forks need identical decisions), or
133
   * resumeAgentBackground threads one reconstructed from sidechain records.
134
   */
135
  contentReplacementState?: ContentReplacementState
136
  /**
137
   * Parent's rendered system prompt bytes, frozen at turn start.
138
   * Used by fork subagents to share the parent's prompt cache — re-calling
139
   * getSystemPrompt() at fork-spawn time can diverge (GrowthBook cold→warm)
140
   * and bust the cache. See forkSubagent.ts.
141
   */
142
  renderedSystemPrompt?: SystemPrompt
143
}

之后runtime context可以分为两条链路：

一条是 runtime 期间 compact的路径
另一条是 /resume 的重建逻辑状态（session memory和 transcript）

compact

四层压缩：

微压缩

规则驱动，按照白名单过滤，保留最近 N 个，清除更早结果

白名单：Fileread/edit…，Bash，Grep，Glob，WebSearch，WebFetch；这些工具的输出通常不会有太长的时效性，因此可以过滤掉；

过滤：但是过滤不能对前缀缓存影响太多

因此有两条路径：

Cache Microcompact（细粒度）

用户处在连续对话，服务端缓存还在

通过 Cache Editing APi 删除（不动Prompt前缀），维护全局状态追踪

Time-base Microcompact（粗粒度）

用户离开一段时间后回来，服务端缓存过期

直接修改消息内容，旧输出直接替换成占位符，无需维护额外状态

细节：Cache Microcompact 只对主线程生效，因为我们曾使用抓包的方式发现过，会有 SubAgent 来做会话记忆提取，prompt建议等后台任务，如果这些SubAgent把自己的工具结果注册到全局状态里面，那主线程下次尝试删除时就会去删一些在自己对话历史里根本不存在的工具

会话记忆压缩

提取结构化事实（不把对话做摘要（不使用encoding-only）），按照项目结构/用户偏好/任务进度，持久化到MEMORY.md

先等待后台的记忆提取完成，然后读取MEMORY.md的内容

关键设计：保留多少最近的消息（最小tokens（1w），最小消息数5条，最大tokens）

细节：

tool_use和tool_result 不能分开，要么同时保留，要么同时抛弃
流式传输时，同一个message id 可能被拆成多条消息，thinking在一条，tool_use在另一条，要一起都保留

会话记忆压缩的优势：不需要模型做摘要，用结构化记忆，保留最近的原始消息

完整压缩

当会话记忆压缩不够用的时候，就需要完整压缩，这一层就需要调用模型（源码里的 prompt 就值得研究）

analysis 机制 :

压缩prompt -> 详细分析 ->

最终摘要 -> 格式化时，剥离analysis

目的就是：让模型在输出摘要之前想清楚（显式的）；本质可以理解为 chain of thought

实际问题：

压缩请求可以能触发，prompt too long；

源码解法：按API轮次分组，从最早的组开始丢弃，最多重试三次

压缩完成之后：

清空文件读取缓存 -> 重新注入最近文件（预算5w tokens） -> 重新注入 plan 文件 -> 重新注入skill内容 -> 重新注入mcp工具说明 -> 重新注入 agent 列表

自动触发

在模型每次调用之后 -> 检查 token用量（if 超过阈值）-> 会话记忆压缩 -> 完整压缩 -> 熔断器连续失败3次 (停止)

session_memory SubAgent 和 compact Subagent 不会触发 autocompact；因为他们本来就是fork出来的 SubAgent，如果他们也会 autocompact，那就会导致死锁

工具记忆

核心工具，启动时加载；拓展工具，通过toolSearch 按需发现和加载；（每个工具采用 Zod schema 定义输入参数），模型输入的Json 必须通过验证才能执行

如果工具输出太大，系统会到外部的 tool result storage，给模型一个摘要和指针，按需取用

session memory

在src/services/SessionMemory/sessionMemory.ts 中，session_memory 有一个 post-sampling hook 触发，使用 forked subagent 更新一个 markdown文件，触发门槛由 token增长和tool call 数共同控制。

/resume

transcipt：写入 src/utils/sessionStorage.ts 的 recordTranscipt()

1
export async function recordTranscript(
2
  messages: Message[],
3
  teamInfo?: TeamInfo,
4
  startingParentUuidHint?: UUID,
5
  allMessages?: readonly Message[],
6
): Promise<UUID | null> {
7
  const cleanedMessages = cleanMessagesForLogging(messages, allMessages)
8
  const sessionId = getSessionId() as UUID
9
  const messageSet = await getSessionMessages(sessionId)
10
  const newMessages: typeof cleanedMessages = []
11
  let startingParentUuid: UUID | undefined = startingParentUuidHint
12
  let seenNewMessage = false
13
  for (const m of cleanedMessages) {
14
    if (messageSet.has(m.uuid as UUID)) {
15
      // Only track skipped messages that form a prefix. After compaction,
16
      // messagesToKeep appear AFTER new CB/summary, so this skips them.
17
      if (!seenNewMessage && isChainParticipant(m)) {
18
        startingParentUuid = m.uuid as UUID
19
      }
20
    } else {
21
      newMessages.push(m)
22
      seenNewMessage = true
23
    }
24
  }
25
  if (newMessages.length > 0) {
26
    await getProject().insertMessageChain(
27
      newMessages,
28
      false,
29
      undefined,
30
      startingParentUuid,
31
      teamInfo,
32
    )
33
  }
34
  // Return the last ACTUALLY recorded chain-participant's UUID, OR the
35
  // prefix-tracked UUID if no new chain participants were recorded. This lets
36
  // callers (useLogMessages) maintain the correct parent chain even when the
37
  // slice is all-recorded (rewind, /resume scenarios where every message is
38
  // already in messageSet). Progress is skipped — it's written to the JSONL
39
  // but nothing chains TO it (see isChainParticipant).
40
  const lastRecorded = newMessages.findLast(isChainParticipant)
41
  return (lastRecorded?.uuid as UUID | undefined) ?? startingParentUuid ?? null
42
}

预处理移除敏感信息等内容之后遍历消息，分离已存在的消息和新消息，动态更新 parent uuid 确保对话链在压缩，重放，并发等复杂场景依然可以保持正常的父子关系，这样就可以只维护一个简单的 parentUuid 状态，无需关心内部复杂的逻辑。

核心的结构是：JSONL + parentUuid链；compact boundary 会刻意断链，确保resume 从 summary 挂起

resume重建逻辑状态（不是恢复进程）在 src/utils/sessionStorage中的loadTranscriptFile()，读取transcipt，桥接 legacy progress，恢复 content replacement，处理 preserved segment;

1
export async function loadTranscriptFile(
2
  filePath: string,
3
  opts?: { keepAllLeaves?: boolean },
4
): Promise<{
5
  messages: Map<UUID, TranscriptMessage>
6
  summaries: Map<UUID, string>
7
  customTitles: Map<UUID, string>
8
  tags: Map<UUID, string>
9
  agentNames: Map<UUID, string>
10
  agentColors: Map<UUID, string>
11
  agentSettings: Map<UUID, string>
12
  prNumbers: Map<UUID, number>
13
  prUrls: Map<UUID, string>
14
  prRepositories: Map<UUID, string>
15
  modes: Map<UUID, string>
16
  worktreeStates: Map<UUID, PersistedWorktreeSession | null>
17
  fileHistorySnapshots: Map<UUID, FileHistorySnapshotMessage>
18
  attributionSnapshots: Map<UUID, AttributionSnapshotMessage>
19
  contentReplacements: Map<UUID, ContentReplacementRecord[]>
20
  agentContentReplacements: Map<AgentId, ContentReplacementRecord[]>
21
  contextCollapseCommits: ContextCollapseCommitEntry[]
22
  contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
23
  leafUuids: Set<UUID>
24
}> {
25
  const messages = new Map<UUID, TranscriptMessage>()
26
  const summaries = new Map<UUID, string>()
27
  const customTitles = new Map<UUID, string>()
28
  const tags = new Map<UUID, string>()
29
  const agentNames = new Map<UUID, string>()
30
  const agentColors = new Map<UUID, string>()
31
  const agentSettings = new Map<UUID, string>()
32
  const prNumbers = new Map<UUID, number>()
33
  const prUrls = new Map<UUID, string>()
34
  const prRepositories = new Map<UUID, string>()
35
  const modes = new Map<UUID, string>()
36
  const worktreeStates = new Map<UUID, PersistedWorktreeSession | null>()
37
  const fileHistorySnapshots = new Map<UUID, FileHistorySnapshotMessage>()
38
  const attributionSnapshots = new Map<UUID, AttributionSnapshotMessage>()
39
  const contentReplacements = new Map<UUID, ContentReplacementRecord[]>()
40
  const agentContentReplacements = new Map<
41
    AgentId,
42
    ContentReplacementRecord[]
43
  >()
44
  // Array, not Map — commit order matters (nested collapses).
45
  const contextCollapseCommits: ContextCollapseCommitEntry[] = []
46
  // Last-wins — later entries supersede.
47
  let contextCollapseSnapshot: ContextCollapseSnapshotEntry | undefined
48

49
  try {
50
    // For large transcripts, avoid materializing megabytes of stale content.
51
    // Single forward chunked read: attribution-snapshot lines are skipped at
52
    // the fd level (never buffered), compact boundaries truncate the
53
    // accumulator in-stream. Peak allocation is the OUTPUT size, not the
54
    // file size — a 151 MB session that is 84% stale attr-snaps allocates
55
    // ~32 MB instead of 159+64 MB. This matters because mimalloc does not
56
    // return those pages to the OS even after JS-level GC frees the backing
57
    // buffers (measured: arrayBuffers=0 after Bun.gc(true) but RSS stuck at
58
    // ~316 MB on the old scan+strip path vs ~155 MB here).
59
    //
60
    // Pre-boundary metadata (agent-setting, mode, pr-link, etc.) is recovered
61
    // via a cheap byte-level forward scan of [0, boundary).
62
    let buf: Buffer | null = null
63
    let metadataLines: string[] | null = null
64
    let hasPreservedSegment = false
65
    if (!isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP)) {
66
      const { size } = await stat(filePath)
67
      if (size > SKIP_PRECOMPACT_THRESHOLD) {
68
        const scan = await readTranscriptForLoad(filePath, size)
69
        buf = scan.postBoundaryBuf
70
        hasPreservedSegment = scan.hasPreservedSegment
71
        // >0 means we truncated pre-boundary bytes and must recover
72
        // session-scoped metadata from that range. A preservedSegment
73
        // boundary does not truncate (preserved messages are physically
74
        // pre-boundary), so offset stays 0 unless an EARLIER non-preserved
75
        // boundary already truncated — in which case the preserved messages
76
        // for the later boundary are post-that-earlier-boundary and were
77
        // kept, and we still want the metadata scan.
78
        if (scan.boundaryStartOffset > 0) {
79
          metadataLines = await scanPreBoundaryMetadata(
80
            filePath,
81
            scan.boundaryStartOffset,
82
          )
83
        }
84
      }
85
    }
86
    buf ??= await readFile(filePath)
87
    // For large buffers (which here means readTranscriptForLoad output with
88
    // attr-snaps already stripped at the fd level — the <5MB readFile path
89
    // falls through the size gate below), the dominant cost is parsing dead
90
    // fork branches that buildConversationChain would discard anyway. Skip
91
    // when the caller needs all
92
    // leaves (loadAllLogsFromSessionFile for /insights picks the branch with
93
    // most user messages, not the latest), when the boundary has a
94
    // preservedSegment (those messages keep their pre-compact parentUuid on
95
    // disk -- applyPreservedSegmentRelinks splices them in-memory AFTER
96
    // parse, so a pre-parse chain walk would drop them as orphans), and when
97
    // CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP is set (that kill switch means
98
    // "load everything, skip nothing"; this is another skip-before-parse
99
    // optimization and the scan it depends on for hasPreservedSegment did
100
    // not run).
101
    if (
102
      !opts?.keepAllLeaves &&
103
      !hasPreservedSegment &&
104
      !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_PRECOMPACT_SKIP) &&
105
      buf.length > SKIP_PRECOMPACT_THRESHOLD
106
    ) {
107
      buf = walkChainBeforeParse(buf)
108
    }
109

110
    // First pass: process metadata-only lines collected during the boundary scan.
111
    // These populate the session-scoped maps (agentSettings, modes, prNumbers,
112
    // etc.) for entries written before the compact boundary. Any overlap with
113
    // the post-boundary buffer is harmless — later values overwrite earlier ones.
114
    if (metadataLines && metadataLines.length > 0) {
115
      const metaEntries = parseJSONL<Entry>(
116
        Buffer.from(metadataLines.join('\n')),
117
      )
118
      for (const entry of metaEntries) {
119
        if (entry.type === 'summary' && entry.leafUuid) {
120
          summaries.set(entry.leafUuid, entry.summary)
121
        } else if (entry.type === 'custom-title' && entry.sessionId) {
122
          customTitles.set(entry.sessionId, entry.customTitle)
123
        } else if (entry.type === 'tag' && entry.sessionId) {
124
          tags.set(entry.sessionId, entry.tag)
125
        } else if (entry.type === 'agent-name' && entry.sessionId) {
126
          agentNames.set(entry.sessionId, entry.agentName)
127
        } else if (entry.type === 'agent-color' && entry.sessionId) {
128
          agentColors.set(entry.sessionId, entry.agentColor)
129
        } else if (entry.type === 'agent-setting' && entry.sessionId) {
130
          agentSettings.set(entry.sessionId, entry.agentSetting)
131
        } else if (entry.type === 'mode' && entry.sessionId) {
132
          modes.set(entry.sessionId, entry.mode)
133
        } else if (entry.type === 'worktree-state' && entry.sessionId) {
134
          worktreeStates.set(entry.sessionId, entry.worktreeSession)
135
        } else if (entry.type === 'pr-link' && entry.sessionId) {
136
          prNumbers.set(entry.sessionId, entry.prNumber)
137
          prUrls.set(entry.sessionId, entry.prUrl)
138
          prRepositories.set(entry.sessionId, entry.prRepository)
139
        }
140
      }
141
    }
142

143
    const entries = parseJSONL<Entry>(buf)
144

145
    // Bridge map for legacy progress entries: progress_uuid → progress_parent_uuid.
146
    // PR #24099 removed progress from isTranscriptMessage, so old transcripts with
147
    // progress in the parentUuid chain would truncate at buildConversationChain
148
    // when messages.get(progressUuid) returns undefined. Since transcripts are
149
    // append-only (parents before children), we record each progress→parent link
150
    // as we see it, chain-resolving through consecutive progress entries, then
151
    // rewrite any subsequent message whose parentUuid lands in the bridge.
152
    const progressBridge = new Map<UUID, UUID | null>()
153

154
    for (const entry of entries) {
155
      // Legacy progress check runs before the Entry-typed else-if chain —
156
      // progress is not in the Entry union, so checking it after TypeScript
157
      // has narrowed `entry` intersects to `never`.
158
      if (isLegacyProgressEntry(entry)) {
159
        // Chain-resolve through consecutive progress entries so a later
160
        // message pointing at the tail of a progress run bridges to the
161
        // nearest non-progress ancestor in one lookup.
162
        const parent = entry.parentUuid
163
        progressBridge.set(
164
          entry.uuid,
165
          parent && progressBridge.has(parent)
166
            ? (progressBridge.get(parent) ?? null)
167
            : parent,
168
        )
169
        continue
170
      }
171
      if (isTranscriptMessage(entry)) {
172
        if (entry.parentUuid && progressBridge.has(entry.parentUuid)) {
173
          entry.parentUuid = progressBridge.get(entry.parentUuid) ?? null
174
        }
175
        messages.set(entry.uuid, entry)
176
        // Compact boundary: prior marble-origami-commit entries reference
177
        // messages that won't be in the post-boundary chain. The >5MB
178
        // backward-scan path discards them naturally by never reading the
179
        // pre-boundary bytes; the <5MB path reads everything, so discard
180
        // here. Without this, getStats().collapsedSpans in /context
181
        // overcounts (projectView silently skips the stale commits but
182
        // they're still in the log).
183
        if (isCompactBoundaryMessage(entry)) {
184
          contextCollapseCommits.length = 0
185
          contextCollapseSnapshot = undefined
186
        }
187
      } else if (entry.type === 'summary' && entry.leafUuid) {
188
        summaries.set(entry.leafUuid, entry.summary)
189
      } else if (entry.type === 'custom-title' && entry.sessionId) {
190
        customTitles.set(entry.sessionId, entry.customTitle)
191
      } else if (entry.type === 'tag' && entry.sessionId) {
192
        tags.set(entry.sessionId, entry.tag)
193
      } else if (entry.type === 'agent-name' && entry.sessionId) {
194
        agentNames.set(entry.sessionId, entry.agentName)
195
      } else if (entry.type === 'agent-color' && entry.sessionId) {
196
        agentColors.set(entry.sessionId, entry.agentColor)
197
      } else if (entry.type === 'agent-setting' && entry.sessionId) {
198
        agentSettings.set(entry.sessionId, entry.agentSetting)
199
      } else if (entry.type === 'mode' && entry.sessionId) {
200
        modes.set(entry.sessionId, entry.mode)
201
      } else if (entry.type === 'worktree-state' && entry.sessionId) {
202
        worktreeStates.set(entry.sessionId, entry.worktreeSession)
203
      } else if (entry.type === 'pr-link' && entry.sessionId) {
204
        prNumbers.set(entry.sessionId, entry.prNumber)
205
        prUrls.set(entry.sessionId, entry.prUrl)
206
        prRepositories.set(entry.sessionId, entry.prRepository)
207
      } else if (entry.type === 'file-history-snapshot') {
208
        fileHistorySnapshots.set(entry.messageId, entry)
209
      } else if (entry.type === 'attribution-snapshot') {
210
        attributionSnapshots.set(entry.messageId, entry)
211
      } else if (entry.type === 'content-replacement') {
212
        // Subagent decisions key by agentId (sidechain resume); main-thread
213
        // decisions key by sessionId (/resume).
214
        if (entry.agentId) {
215
          const existing = agentContentReplacements.get(entry.agentId) ?? []
216
          agentContentReplacements.set(entry.agentId, existing)
217
          existing.push(...entry.replacements)
218
        } else {
219
          const existing = contentReplacements.get(entry.sessionId) ?? []
220
          contentReplacements.set(entry.sessionId, existing)
221
          existing.push(...entry.replacements)
222
        }
223
      } else if (entry.type === 'marble-origami-commit') {
224
        contextCollapseCommits.push(entry)
225
      } else if (entry.type === 'marble-origami-snapshot') {
226
        contextCollapseSnapshot = entry
227
      }
228
    }
229
  } catch {
230
    // File doesn't exist or can't be read
231
  }
232

233
  applyPreservedSegmentRelinks(messages)
234
  applySnipRemovals(messages)
235

236
  // Compute leaf UUIDs once at load time
237
  // Only user/assistant messages should be considered as leaves for anchoring resume.
238
  // Other message types (system, attachment) are metadata or auxiliary and shouldn't
239
  // anchor a conversation chain.
240
  //
241
  // We use standard parent relationship for main chain detection, but also need to
242
  // handle cases where the last message is a system/metadata message.
243
  // For each conversation chain (identified by following parent links), the leaf
244
  // is the most recent user/assistant message.
245
  const allMessages = [...messages.values()]
246

247
  // Standard leaf computation using parent relationships
248
  const parentUuids = new Set(
249
    allMessages
250
      .map(msg => msg.parentUuid)
251
      .filter((uuid): uuid is UUID => uuid !== null),
252
  )
253

254
  // Find all terminal messages (messages with no children)
255
  const terminalMessages = allMessages.filter(msg => !parentUuids.has(msg.uuid))
256

257
  const leafUuids = new Set<UUID>()
258
  let hasCycle = false
259

260
  if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_pebble_leaf_prune', false)) {
261
    // Build a set of UUIDs that have user/assistant children
262
    // (these are mid-conversation nodes, not dead ends)
263
    const hasUserAssistantChild = new Set<UUID>()
264
    for (const msg of allMessages) {
265
      if (msg.parentUuid && (msg.type === 'user' || msg.type === 'assistant')) {
266
        hasUserAssistantChild.add(msg.parentUuid)
267
      }
268
    }
269

270
    // For each terminal message, walk back to find the nearest user/assistant ancestor.
271
    // Skip ancestors that already have user/assistant children - those are mid-conversation
272
    // nodes where the conversation continued (e.g., an assistant tool_use message whose
273
    // progress child is terminal, but whose tool_result child continues the conversation).
274
    for (const terminal of terminalMessages) {
275
      const seen = new Set<UUID>()
276
      let current: TranscriptMessage | undefined = terminal
277
      while (current) {
278
        if (seen.has(current.uuid)) {
279
          hasCycle = true
280
          break
281
        }
282
        seen.add(current.uuid)
283
        if (current.type === 'user' || current.type === 'assistant') {
284
          if (!hasUserAssistantChild.has(current.uuid)) {
285
            leafUuids.add(current.uuid)
286
          }
287
          break
288
        }
289
        current = current.parentUuid
290
          ? messages.get(current.parentUuid)
291
          : undefined
292
      }
293
    }
294
  } else {
295
    // Original leaf computation: walk back from terminal messages to find
296
    // the nearest user/assistant ancestor unconditionally
297
    for (const terminal of terminalMessages) {
298
      const seen = new Set<UUID>()
299
      let current: TranscriptMessage | undefined = terminal
300
      while (current) {
301
        if (seen.has(current.uuid)) {
302
          hasCycle = true
303
          break
304
        }
305
        seen.add(current.uuid)
306
        if (current.type === 'user' || current.type === 'assistant') {
307
          leafUuids.add(current.uuid)
308
          break
309
        }
310
        current = current.parentUuid
311
          ? messages.get(current.parentUuid)
312
          : undefined
313
      }
314
    }
315
  }
316

317
  if (hasCycle) {
318
    logEvent('tengu_transcript_parent_cycle', {})
319
  }
320

321
  return {
322
    messages,
323
    summaries,
324
    customTitles,
325
    tags,
326
    agentNames,
327
    agentColors,
328
    agentSettings,
329
    prNumbers,
330
    prUrls,
331
    prRepositories,
332
    modes,
333
    worktreeStates,
334
    fileHistorySnapshots,
335
    attributionSnapshots,
336
    contentReplacements,
337
    agentContentReplacements,
338
    contextCollapseCommits,
339
    contextCollapseSnapshot,
340
    leafUuids,
341
  }
342
}

在同一个文件的 buildConversationChain()，沿着parentUuid，进行恢复，恢复 tool result分支（值得一提的是，对于 tool_user，tool_result 以及 thinking ； claude code 都有进行更细粒度的控制）以及在src/utils/conversationRecovery.ts；过滤unresolved tool use, orphaned thinking, whitespace assistant，并在需要时补 continuation sentinel

对于前文提到的 forked SubAgent，是针对 prompt cache 命中而设计的子代理，

1
export async function runForkedAgent({
2
  promptMessages,
3
  cacheSafeParams,
4
  canUseTool,
5
  querySource,
6
  forkLabel,
7
  overrides,
8
  maxOutputTokens,
9
  maxTurns,
10
  onMessage,
11
  skipTranscript,
12
  skipCacheWrite,
13
}: ForkedAgentParams): Promise<ForkedAgentResult> {
14
  const startTime = Date.now()
15
  const outputMessages: Message[] = []
16
  let totalUsage: NonNullableUsage = { ...EMPTY_USAGE }
17

18
  const {
19
    systemPrompt,
20
    userContext,
21
    systemContext,
22
    toolUseContext,
23
    forkContextMessages,
24
  } = cacheSafeParams
25

26
  // Create isolated context to prevent mutation of parent state
27
  const isolatedToolUseContext = createSubagentContext(
28
    toolUseContext,
29
    overrides,
30
  )
31

32
  // Do NOT filterIncompleteToolCalls here — it drops the whole assistant on
33
  // partial tool batches, orphaning the paired results (API 400). Dangling
34
  // tool_uses are repaired downstream by ensureToolResultPairing in claude.ts,
35
  // same as the main thread — identical post-repair prefix keeps the cache hit.
36
  const initialMessages: Message[] = [...forkContextMessages, ...promptMessages]
37

38
  // Generate agent ID and record initial messages for transcript
39
  // When skipTranscript is set, skip agent ID creation and all transcript I/O
40
  const agentId = skipTranscript ? undefined : createAgentId(forkLabel)
41
  let lastRecordedUuid: UUID | null = null
42
  if (agentId) {
43
    await recordSidechainTranscript(initialMessages, agentId).catch(err =>
44
      logForDebugging(
45
        `Forked agent [${forkLabel}] failed to record initial transcript: ${err}`,
46
      ),
47
    )
48
    // Track the last recorded message UUID for parent chain continuity
49
    lastRecordedUuid =
50
      initialMessages.length > 0
51
        ? initialMessages[initialMessages.length - 1]!.uuid
52
        : null
53
  }
54

55
  // Run the query loop with isolated context (cache-safe params preserved)
56
  try {
57
    for await (const message of query({
58
      messages: initialMessages,
59
      systemPrompt,
60
      userContext,
61
      systemContext,
62
      canUseTool,
63
      toolUseContext: isolatedToolUseContext,
64
      querySource,
65
      maxOutputTokensOverride: maxOutputTokens,
66
      maxTurns,
67
      skipCacheWrite,
68
    })) {
69
      // Extract real usage from message_delta stream events (final usage per API call)
70
      if (message.type === 'stream_event') {
71
        if (
72
          'event' in message &&
73
          message.event?.type === 'message_delta' &&
74
          message.event.usage
75
        ) {
76
          const turnUsage = updateUsage({ ...EMPTY_USAGE }, message.event.usage)
77
          totalUsage = accumulateUsage(totalUsage, turnUsage)
78
        }
79
        continue
80
      }
81
      if (message.type === 'stream_request_start') {
82
        continue
83
      }
84

85
      logForDebugging(
86
        `Forked agent [${forkLabel}] received message: type=${message.type}`,
87
      )
88

89
      outputMessages.push(message as Message)
90
      onMessage?.(message as Message)
91

92
      // Record transcript for recordable message types (same pattern as runAgent.ts)
93
      const msg = message as Message
94
      if (
95
        agentId &&
96
        (msg.type === 'assistant' ||
97
          msg.type === 'user' ||
98
          msg.type === 'progress')
99
      ) {
100
        await recordSidechainTranscript([msg], agentId, lastRecordedUuid).catch(
101
          err =>
102
            logForDebugging(
103
              `Forked agent [${forkLabel}] failed to record transcript: ${err}`,
104
            ),
105
        )
106
        if (msg.type !== 'progress') {
107
          lastRecordedUuid = msg.uuid
108
        }
109
      }
110
    }
111
  } finally {
112
    // Release cloned file state cache memory (same pattern as runAgent.ts)
113
    isolatedToolUseContext.readFileState.clear()
114
    // Release the cloned fork context messages
115
    initialMessages.length = 0
116
  }
117

118
  logForDebugging(
119
    `Forked agent [${forkLabel}] finished: ${outputMessages.length} messages, types=[${outputMessages.map(m => m.type).join(', ')}], totalUsage: input=${totalUsage.input_tokens} output=${totalUsage.output_tokens} cacheRead=${totalUsage.cache_read_input_tokens} cacheCreate=${totalUsage.cache_creation_input_tokens}`,
120
  )
121

122
  const durationMs = Date.now() - startTime
123

124
  // Log the fork query metrics with full NonNullableUsage
125
  logForkAgentQueryEvent({
126
    forkLabel,
127
    querySource,
128
    durationMs,
129
    messageCount: outputMessages.length,
130
    totalUsage,
131
    queryTracking: toolUseContext.queryTracking,
132
  })
133

134
  return {
135
    messages: outputMessages,
136
    totalUsage,
137
  }
138
}

forked agent 执行独立的LLM 查询循环，实时推送消息给父进程，维护独立的对话记录与用量统计

createSubagentContext，克隆 toolUseContext，避免污染父代理状态（这点也可以给我们启发，其实很多多agent协作之间，可以是以上下文切面来分离Subagent的）；

复用cacheSafeParams的静态prompt，提升缓存命中率；

通过query的aysnc iterator实时处理 message_delta 等流事件

以及在src/tools/AgentTool/forkSubagent.ts的 buildForkedMessages() 刻意给所有child 构造几乎一致的prefix，也能增加缓存命中率；

1
export function buildForkedMessages(
2
  directive: string,
3
  assistantMessage: AssistantMessage,
4
): MessageType[] {
5
  // Clone the assistant message to avoid mutating the original, keeping all
6
  // content blocks (thinking, text, and every tool_use)
7
  const fullAssistantMessage: AssistantMessage = {
8
    ...assistantMessage,
9
    uuid: randomUUID(),
10
    message: {
11
      ...assistantMessage.message,
12
      content: [...assistantMessage.message.content],
13
    },
14
  }
15

16
  // Collect all tool_use blocks from the assistant message
17
  const toolUseBlocks = assistantMessage.message.content.filter(
18
    (block): block is BetaToolUseBlock => block.type === 'tool_use',
19
  )
20

21
  if (toolUseBlocks.length === 0) {
22
    logForDebugging(
23
      `No tool_use blocks found in assistant message for fork directive: ${directive.slice(0, 50)}...`,
24
      { level: 'error' },
25
    )
26
    return [
27
      createUserMessage({
28
        content: [
29
          { type: 'text' as const, text: buildChildMessage(directive) },
30
        ],
31
      }),
32
    ]
33
  }
34

35
  // Build tool_result blocks for every tool_use, all with identical placeholder text
36
  const toolResultBlocks = toolUseBlocks.map(block => ({
37
    type: 'tool_result' as const,
38
    tool_use_id: block.id,
39
    content: [
40
      {
41
        type: 'text' as const,
42
        text: FORK_PLACEHOLDER_RESULT,
43
      },
44
    ],
45
  }))
46

47
  // Build a single user message: all placeholder tool_results + the per-child directive
48
  // TODO(smoosh): this text sibling creates a [tool_result, text] pattern on the wire
49
  // (renders as </function_results>\n\nHuman:<text>). One-off per-child construction,
50
  // not a repeated teacher, so low-priority. If we ever care, use smooshIntoToolResult
51
  // from src/utils/messages.ts to fold the directive into the last tool_result.content.
52
  const toolResultMessage = createUserMessage({
53
    content: [
54
      ...toolResultBlocks,
55
      {
56
        type: 'text' as const,
57
        text: buildChildMessage(directive),
58
      },
59
    ],
60
  })
61

62
  return [fullAssistantMessage, toolResultMessage]
63
}

将父代理的 assistant 消息（含工具调用）转换为子代理的初始对话上下文，通过占位符 tool_result 模拟工具已执行，使子代理能基于”假设的工具输出”继续推理

以及在src/utils/agentContext使用 AsyncLocalStorage 隔离并发 agent，对于 cache-aware的上下文继承已经达到了极度强调的状态

final

关键点:

compact/session memory 和 resume/fork cache sharing

bye~