{
  "id": "token-burn",
  "asOf": "2026-06-19",
  "humanRoute": "/solutions/token-burn",
  "slug": "token-burn",
  "human": "/solutions/token-burn",
  "json": "/solutions/token-burn.json",
  "title": "Token / API budget burn",
  "agentTitle": "Treat budgets as runtime resources, not vibes",
  "tagline": "The retry loop does not know it is on fire.",
  "severity": "high",
  "symptoms": [
    "Spend climbs far faster than work completed; a small task runs up a large bill.",
    "The same static context (long system prompt, AGENTS.md, tool schemas) is resent on every single call.",
    "Retries and parallel sub-agents fan out with no shared cap, so one stuck branch bills forever."
  ],
  "rootCauses": [
    "Static files are injected into every call instead of cached or retrieved on demand.",
    "Budgets are tracked as ad hoc counters that get duplicated across delegation and parallel branches, so no one owns the total.",
    "Budget exhaustion triggers another model call asking what to do, instead of a hard stop."
  ],
  "firstSafeMove": "Put a hard per-run cap on tokens, retries, tool calls, wall-clock, and parallel branches outside the model; on exhaustion, stop with a typed state rather than asking the model.",
  "fixPattern": [
    "Keep stable policy and tool schemas in a cacheable prefix; move volatile task material later so dynamic results do not invalidate the cache.",
    "Retrieve large static files on demand instead of injecting them every turn.",
    "Give the budget a single owner that survives recursive delegation and fan-out, so sub-agents draw from one account, not many."
  ],
  "requiredReceipts": [
    "per-run caps declared: tokens, retries, tool calls, wall-clock, branches",
    "cache-prefix separation in place (stable vs volatile)",
    "a typed stop state on budget exhaustion (not another model call)"
  ],
  "agentInstruction": "Read this page. Inspect the run's token shape. Return: (1) where static context is being resent, (2) which caps are missing, (3) the single budget owner, and (4) the stop state on exhaustion. Do not start work until the per-run caps exist.",
  "relatedRoutes": [
    "/architecture",
    "/build",
    "/tools"
  ],
  "lastReviewed": "2026-06-19"
}
