Claude Code token usage analyzer - breaks down usage by project, session, and subagent

Based on the provided code, this is a Python tool called "Claude Code token usage analyzer" that parses JSONL files from `~/.claude/projects/` to analyze token usage patterns. The script extracts token usage data (input, output, cache creation, and cache read tokens) from assistant messages, identifies human-originated prompts while filtering out tool results and sidechain messages, and supports filtering by date range through environment variables. The tool also tracks session metadata including agent IDs, session IDs, and timestamps, with the ability to analyze subagent sessions.

/usr/bin/env python3 """ Claude Code token usage analyzer. Analyzes ~/.claude/projects/ JSONL files for token usage patterns. """ import json import os import sys from pathlib import Path from collections import defaultdict from datetime import datetime, timedelta, timezone PROJECTS DIR = Path.home / ".claude" / "projects" OUTPUT DIR = Path.home / "tuin" / "analysis" / "tokens" Filter: only include sessions that started within the last N days None = all time SINCE DAYS = int os.environ.get "SINCE DAYS", "0" or None SINCE DATE = os.environ.get "SINCE DATE" e.g. "2026-03-30" def extract text content content : """Extract text from message content string or list .""" if isinstance content, str : return content if isinstance content, list : parts = for item in content: if isinstance item, dict : if item.get "type" == "text": parts.append item.get "text", "" elif item.get "type" == "tool result": Skip tool results - not user prompts pass elif isinstance item, str : parts.append item return "\n".join parts .strip return "" def is human prompt msg obj : """Check if this is a human-originated prompt not tool result .""" content = msg obj.get "message", {} .get "content", "" if isinstance content, list : If all items are tool result, it's not a human prompt types = i.get "type" for i in content if isinstance i, dict if types and all t == "tool result" for t in types : return False return True def parse session jsonl path, is subagent=False : """Parse a single JSONL session file.""" usage total = defaultdict int prompts = agent id = None session id = None timestamp start = None subagent sessions = try: with open jsonl path as f: lines = f.readlines except Exception: return None for line in lines: try: obj = json.loads line except json.JSONDecodeError: continue msg type = obj.get "type" ts = obj.get "timestamp" if ts and not timestamp start: timestamp start = ts if not agent id: agent id = obj.get "agentId" if not session id: session id = obj.get "sessionId" if msg type == "assistant": usage = obj.get "message", {} .get "usage", {} usage total "input tokens" += usage.get "input tokens", 0 usage total "cache creation input tokens" += usage.get "cache creation input tokens", 0 usage total "cache read input tokens" += usage.get "cache read input tokens", 0 usage total "output tokens" += usage.get "output tokens", 0 elif msg type == "user": user type = obj.get "userType", "" is sidechain = obj.get "isSidechain", False content = obj.get "message", {} .get "content", "" text = extract text content content Only capture actual human prompts not tool results, not sidechain if text and not is sidechain and is human prompt obj and user type = "tool": prompts.append { "text": text, "timestamp": obj.get "timestamp" , "entrypoint": obj.get "entrypoint", "" , } Check for subagent sessions session dir = jsonl path.parent / jsonl path.stem if session dir.is dir : subagents dir = session dir / "subagents" if subagents dir.is dir : for sub file in subagents dir.glob " .jsonl" : sub data = parse session sub file, is subagent=True if sub data: sub data "subagent file" = str sub file.name subagent sessions.append sub data total tokens = usage total "input tokens" + usage total "cache creation input tokens" + usage total "cache read input tokens" + usage total "output tokens" return { "file": str jsonl path , "session id": session id or jsonl path.stem, "agent id": agent id, "is subagent": is subagent, "timestamp start": timestamp start, "usage": dict usage total , "total tokens": total tokens, "prompts": prompts, "subagent sessions": subagent sessions, } def get project name project dir name : """Convert directory name to readable project name.""" Strip leading -Users-kieranklaassen- name = project dir name prefixes = "-Users-kieranklaassen-", "Users-kieranklaassen-" for p in prefixes: if name.startswith p : name = name len p : break return name or project dir name def get cutoff : """Return a UTC-aware datetime cutoff, or None for all time.""" if SINCE DATE: return datetime.fromisoformat SINCE DATE .replace tzinfo=timezone.utc if SINCE DAYS: return datetime.now timezone.utc - timedelta days=SINCE DAYS return None def session in range session, cutoff : if not cutoff or not session "timestamp start" : return True ts str = session "timestamp start" Parse ISO timestamp try: ts = datetime.fromisoformat ts str.replace "Z", "+00:00" return ts = cutoff except ValueError: return True def analyze all : """Analyze all projects and sessions.""" projects = defaultdict list cutoff = get cutoff for project dir in sorted PROJECTS DIR.iterdir : if not project dir.is dir : continue project name = get project name project dir.name for jsonl file in sorted project dir.glob " .jsonl" : session = parse session jsonl file if session and session "total tokens" 0 and session in range session, cutoff : projects project name .append session return projects def format tokens n : """Format token count with commas.""" return f"{n:,}" def summarize projects projects : """Build per-project summary.""" summaries = for project name, sessions in projects.items : total = defaultdict int all subagent tokens = 0 subagent count = 0 for session in sessions: for k, v in session "usage" .items : total k += v for sub in session "subagent sessions" : all subagent tokens += sub "total tokens" subagent count += 1 grand total = sum total.values summaries.append { "project": project name, "sessions": len sessions , "usage": dict total , "total tokens": grand total, "subagent tokens": all subagent tokens, "subagent count": subagent count, } summaries.sort key=lambda x: x "total tokens" , reverse=True return summaries def find costly sessions projects, top n=20 : """Find the most token-heavy sessions across all projects.""" all sessions = for project name, sessions in projects.items : for session in sessions: all sessions.append project name, session all sessions.sort key=lambda x: x 1 "total tokens" , reverse=True return all sessions :top n def find costly subagents projects, top n=20 : """Find the most token-heavy subagent sessions.""" all subs = for project name, sessions in projects.items : for session in sessions: for sub in session "subagent sessions" : all subs.append project name, session "session id" , sub all subs.sort key=lambda x: x 2 "total tokens" , reverse=True return all subs :top n def write report projects, summaries : """Write the main analysis report.""" OUTPUT DIR.mkdir parents=True, exist ok=True report path = OUTPUT DIR / "token report.md" lines = cutoff = get cutoff date range = f"Since {cutoff.strftime '%Y-%m-%d' }" if cutoff else "All time" lines.append " Claude Code Token Usage Analysis" lines.append f"\nGenerated: {datetime.now .strftime '%Y-%m-%d %H:%M:%S' } | Range: {date range}\n" Grand totals grand input = sum s "usage" .get "input tokens", 0 for s in summaries grand cache create = sum s "usage" .get "cache creation input tokens", 0 for s in summaries grand cache read = sum s "usage" .get "cache read input tokens", 0 for s in summaries grand output = sum s "usage" .get "output tokens", 0 for s in summaries grand total = sum s "total tokens" for s in summaries total sessions = sum s "sessions" for s in summaries total subagent tokens = sum s "subagent tokens" for s in summaries total subagent count = sum s "subagent count" for s in summaries lines.append " Grand Totals\n" lines.append f"- Projects : {len summaries }" lines.append f"- Sessions : {total sessions:,}" lines.append f"- Total tokens : {format tokens grand total }" lines.append f" - Input: {format tokens grand input }" lines.append f" - Cache creation: {format tokens grand cache create }" lines.append f" - Cache read: {format tokens grand cache read }" lines.append f" - Output: {format tokens grand output }" lines.append f"- Subagent sessions : {total subagent count:,} {format tokens total subagent tokens } tokens " lines.append "" Per-project breakdown lines.append " By Project\n" lines.append "| Project | Sessions | Total | Input | Cache Create | Cache Read | Output | Subagents |" lines.append "|---------|----------|-------|-------|--------------|------------|--------|-----------|" for s in summaries: u = s "usage" lines.append f"| {s 'project' } | {s 'sessions' } " f"| {format tokens s 'total tokens' } " f"| {format tokens u.get 'input tokens', 0 } " f"| {format tokens u.get 'cache creation input tokens', 0 } " f"| {format tokens u.get 'cache read input tokens', 0 } " f"| {format tokens u.get 'output tokens', 0 } " f"| {s 'subagent count' } {format tokens s 'subagent tokens' } |" lines.append "" Most costly sessions lines.append " Most Costly Sessions\n" costly = find costly sessions projects, top n=25 for i, proj, session in enumerate costly, 1 : lines.append f" {i}. {proj} — {format tokens session 'total tokens' } tokens" lines.append f"- Session : {session 'session id' } " if session "timestamp start" : lines.append f"- Started : {session 'timestamp start' :19 .replace 'T', ' ' }" u = session "usage" lines.append f"- Tokens : input={format tokens u.get 'input tokens', 0 }, cache create={format tokens u.get 'cache creation input tokens', 0 }, cache read={format tokens u.get 'cache read input tokens', 0 }, output={format tokens u.get 'output tokens', 0 }" lines.append f"- Subagents in session : {len session 'subagent sessions' }" if session "prompts" : lines.append "- First prompt :" first = session "prompts" 0 "text" :400 .replace "\n", " " lines.append f" {first}" lines.append "" Most costly subagents lines.append " Most Costly Subagents\n" costly subs = find costly subagents projects, top n=20 lines.append "| | Project | Parent Session | Subagent File | Total Tokens | Input | Output |" lines.append "|---|---------|----------------|---------------|--------------|-------|--------|" for i, proj, session id, sub in enumerate costly subs, 1 : u = sub "usage" lines.append f"| {i} | {proj} | {session id :8 }... " f"| {sub.get 'subagent file', '?' } " f"| {format tokens sub 'total tokens' } " f"| {format tokens u.get 'input tokens', 0 + u.get 'cache creation input tokens', 0 + u.get 'cache read input tokens', 0 } " f"| {format tokens u.get 'output tokens', 0 } |" lines.append "" Subagent usage by project lines.append " Subagent Usage by Project\n" proj sub stats = for proj name, sessions in projects.items : sub tokens = sum sub "total tokens" for s in sessions for sub in s "subagent sessions" sub count = sum len s "subagent sessions" for s in sessions if sub count 0: proj sub stats.append proj name, sub count, sub tokens proj sub stats.sort key=lambda x: x 2 , reverse=True lines.append "| Project | Subagent Sessions | Subagent Tokens |" lines.append "|---------|-------------------|-----------------|" for proj name, count, tokens in proj sub stats: lines.append f"| {proj name} | {count} | {format tokens tokens } |" lines.append "" with open report path, "w" as f: f.write "\n".join lines print f"Report written: {report path}" return report path def write prompts by project projects : """Write all user prompts for each project to separate files.""" prompts dir = OUTPUT DIR / "prompts" prompts dir.mkdir parents=True, exist ok=True for project name, sessions in projects.items : Collect all prompts across all sessions all prompts = for session in sessions: for prompt in session "prompts" : all prompts.append { "session id": session "session id" , "timestamp": prompt.get "timestamp", "" , "entrypoint": prompt.get "entrypoint", "" , "text": prompt "text" , } if not all prompts: continue Sort by timestamp all prompts.sort key=lambda x: x "timestamp" or "" Safe filename safe name = project name.replace "/", " " .replace " ", " " :80 out path = prompts dir / f"{safe name}.md" lines = lines.append f" Prompts: {project name}" lines.append f"\n{len all prompts } prompts across {len sessions } sessions\n" for i, p in enumerate all prompts, 1 : ts = p "timestamp" :19 .replace "T", " " if p "timestamp" else "unknown" lines.append f" {i}. {ts} Session {p 'session id' :8 } " if p "entrypoint" : lines.append f" entrypoint: {p 'entrypoint' } " lines.append "" lines.append p "text" lines.append "" with open out path, "w" as f: f.write "\n".join lines print f"Prompt files written to: {prompts dir}" def print summary summaries, projects : """Print a quick summary to stdout.""" grand total = sum s "total tokens" for s in summaries total sessions = sum s "sessions" for s in summaries print f"\nTotal: {format tokens grand total } tokens across {total sessions} sessions in {len summaries } projects\n" print f"{'Project':<50} {'Sessions': 8} {'Total Tokens': 14} {'Subagents': 10}" print "-" 86 for s in summaries :30 : print f"{s 'project' :<50} {s 'sessions' : 8,} {format tokens s 'total tokens' : 14} {s 'subagent count' : 10,}" print "\nTop 10 costliest sessions:" for proj, session in find costly sessions projects, top n=10 : ts = session "timestamp start" :10 if session "timestamp start" else "?" first prompt = "" if session "prompts" : first prompt = session "prompts" 0 "text" :80 .replace "\n", " " print f" {ts} {proj}: {format tokens session 'total tokens' } — {first prompt}" def main : print "Scanning projects..." projects = analyze all print f"Found {len projects } projects" summaries = summarize projects projects print summary summaries, projects report path = write report projects, summaries write prompts by project projects print f"\nFull report: {report path}" print f"Prompts: {OUTPUT DIR}/prompts/" if name == " main ": main